| // Code generated by command: go run gcm_amd64_asm.go -out ../../gcm_amd64.s -pkg aes. DO NOT EDIT. |
| |
| //go:build !purego |
| |
| #include "textflag.h" |
| |
| // func gcmAesFinish(productTable *[256]byte, tagMask *[16]byte, T *[16]byte, pLen uint64, dLen uint64) |
| // Requires: PCLMULQDQ, SSE2, SSE4.1, SSSE3 |
| TEXT ·gcmAesFinish(SB), NOSPLIT, $0-40 |
| MOVQ productTable+0(FP), DI |
| MOVQ tagMask+8(FP), SI |
| MOVQ T+16(FP), DX |
| MOVQ pLen+24(FP), AX |
| MOVQ dLen+32(FP), CX |
| MOVOU (DX), X8 |
| MOVOU (SI), X13 |
| MOVOU bswapMask<>+0(SB), X15 |
| MOVOU gcmPoly<>+0(SB), X14 |
| SHLQ $0x03, AX |
| SHLQ $0x03, CX |
| MOVQ AX, X0 |
| PINSRQ $0x01, CX, X0 |
| PXOR X8, X0 |
| MOVOU 224(DI), X8 |
| MOVOU 240(DI), X10 |
| MOVOU X8, X9 |
| PCLMULQDQ $0x00, X0, X8 |
| PCLMULQDQ $0x11, X0, X9 |
| PSHUFD $0x4e, X0, X11 |
| PXOR X0, X11 |
| PCLMULQDQ $0x00, X11, X10 |
| PXOR X8, X10 |
| PXOR X9, X10 |
| MOVOU X10, X11 |
| PSRLDQ $0x08, X10 |
| PSLLDQ $0x08, X11 |
| PXOR X10, X9 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| PXOR X9, X8 |
| PSHUFB X15, X8 |
| PXOR X13, X8 |
| MOVOU X8, (DX) |
| RET |
| |
| DATA bswapMask<>+0(SB)/8, $0x08090a0b0c0d0e0f |
| DATA bswapMask<>+8(SB)/8, $0x0001020304050607 |
| GLOBL bswapMask<>(SB), RODATA|NOPTR, $16 |
| |
| DATA gcmPoly<>+0(SB)/8, $0x0000000000000001 |
| DATA gcmPoly<>+8(SB)/8, $0xc200000000000000 |
| GLOBL gcmPoly<>(SB), RODATA|NOPTR, $16 |
| |
| // func gcmAesInit(productTable *[256]byte, ks []uint32) |
| // Requires: AES, PCLMULQDQ, SSE2, SSSE3 |
| TEXT ·gcmAesInit(SB), NOSPLIT, $0-32 |
| MOVQ productTable+0(FP), DI |
| MOVQ ks_base+8(FP), SI |
| MOVQ ks_len+16(FP), DX |
| SHRQ $0x02, DX |
| DECQ DX |
| MOVOU bswapMask<>+0(SB), X15 |
| MOVOU gcmPoly<>+0(SB), X14 |
| |
| // Encrypt block 0, with the AES key to generate the hash key H |
| MOVOU (SI), X0 |
| MOVOU 16(SI), X11 |
| AESENC X11, X0 |
| MOVOU 32(SI), X11 |
| AESENC X11, X0 |
| MOVOU 48(SI), X11 |
| AESENC X11, X0 |
| MOVOU 64(SI), X11 |
| AESENC X11, X0 |
| MOVOU 80(SI), X11 |
| AESENC X11, X0 |
| MOVOU 96(SI), X11 |
| AESENC X11, X0 |
| MOVOU 112(SI), X11 |
| AESENC X11, X0 |
| MOVOU 128(SI), X11 |
| AESENC X11, X0 |
| MOVOU 144(SI), X11 |
| AESENC X11, X0 |
| MOVOU 160(SI), X11 |
| CMPQ DX, $0x0c |
| JB initEncLast |
| AESENC X11, X0 |
| MOVOU 176(SI), X11 |
| AESENC X11, X0 |
| MOVOU 192(SI), X11 |
| JE initEncLast |
| AESENC X11, X0 |
| MOVOU 208(SI), X11 |
| AESENC X11, X0 |
| MOVOU 224(SI), X11 |
| |
| initEncLast: |
| AESENCLAST X11, X0 |
| PSHUFB X15, X0 |
| |
| // H * 2 |
| PSHUFD $0xff, X0, X11 |
| MOVOU X0, X12 |
| PSRAL $0x1f, X11 |
| PAND X14, X11 |
| PSRLL $0x1f, X12 |
| PSLLDQ $0x04, X12 |
| PSLLL $0x01, X0 |
| PXOR X11, X0 |
| PXOR X12, X0 |
| |
| // Karatsuba pre-computations |
| MOVOU X0, 224(DI) |
| PSHUFD $0x4e, X0, X1 |
| PXOR X0, X1 |
| MOVOU X1, 240(DI) |
| MOVOU X0, X2 |
| MOVOU X1, X3 |
| |
| // Now prepare powers of H and pre-computations for them |
| MOVQ $0x00000007, AX |
| |
| initLoop: |
| MOVOU X2, X11 |
| MOVOU X2, X12 |
| MOVOU X3, X13 |
| PCLMULQDQ $0x00, X0, X11 |
| PCLMULQDQ $0x11, X0, X12 |
| PCLMULQDQ $0x00, X1, X13 |
| PXOR X11, X13 |
| PXOR X12, X13 |
| MOVOU X13, X4 |
| PSLLDQ $0x08, X4 |
| PSRLDQ $0x08, X13 |
| PXOR X4, X11 |
| PXOR X13, X12 |
| MOVOU X14, X2 |
| PCLMULQDQ $0x01, X11, X2 |
| PSHUFD $0x4e, X11, X11 |
| PXOR X2, X11 |
| MOVOU X14, X2 |
| PCLMULQDQ $0x01, X11, X2 |
| PSHUFD $0x4e, X11, X11 |
| PXOR X11, X2 |
| PXOR X12, X2 |
| MOVOU X2, 192(DI) |
| PSHUFD $0x4e, X2, X3 |
| PXOR X2, X3 |
| MOVOU X3, 208(DI) |
| DECQ AX |
| LEAQ -32(DI), DI |
| JNE initLoop |
| RET |
| |
| // func gcmAesData(productTable *[256]byte, data []byte, T *[16]byte) |
| // Requires: PCLMULQDQ, SSE2, SSE4.1, SSSE3 |
| TEXT ·gcmAesData(SB), NOSPLIT, $0-40 |
| MOVQ productTable+0(FP), DI |
| MOVQ data_base+8(FP), SI |
| MOVQ data_len+16(FP), DX |
| MOVQ T+32(FP), CX |
| PXOR X8, X8 |
| MOVOU bswapMask<>+0(SB), X15 |
| MOVOU gcmPoly<>+0(SB), X14 |
| TESTQ DX, DX |
| JEQ dataBail |
| CMPQ DX, $0x0d |
| JE dataTLS |
| CMPQ DX, $0x80 |
| JB startSinglesLoop |
| JMP dataOctaLoop |
| |
| dataTLS: |
| MOVOU 224(DI), X12 |
| MOVOU 240(DI), X13 |
| PXOR X0, X0 |
| MOVQ (SI), X0 |
| PINSRD $0x02, 8(SI), X0 |
| PINSRB $0x0c, 12(SI), X0 |
| XORQ DX, DX |
| JMP dataMul |
| |
| dataOctaLoop: |
| CMPQ DX, $0x80 |
| JB startSinglesLoop |
| SUBQ $0x80, DX |
| MOVOU (SI), X0 |
| MOVOU 16(SI), X1 |
| MOVOU 32(SI), X2 |
| MOVOU 48(SI), X3 |
| MOVOU 64(SI), X4 |
| MOVOU 80(SI), X5 |
| MOVOU 96(SI), X6 |
| MOVOU 112(SI), X7 |
| LEAQ 128(SI), SI |
| PSHUFB X15, X0 |
| PSHUFB X15, X1 |
| PSHUFB X15, X2 |
| PSHUFB X15, X3 |
| PSHUFB X15, X4 |
| PSHUFB X15, X5 |
| PSHUFB X15, X6 |
| PSHUFB X15, X7 |
| PXOR X8, X0 |
| MOVOU (DI), X8 |
| MOVOU 16(DI), X10 |
| MOVOU X8, X9 |
| PSHUFD $0x4e, X0, X12 |
| PXOR X0, X12 |
| PCLMULQDQ $0x00, X0, X8 |
| PCLMULQDQ $0x11, X0, X9 |
| PCLMULQDQ $0x00, X12, X10 |
| MOVOU 32(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X1, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X1, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X1, X12 |
| PXOR X12, X1 |
| MOVOU 48(DI), X12 |
| PCLMULQDQ $0x00, X1, X12 |
| PXOR X12, X10 |
| MOVOU 64(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X2, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X2, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X2, X12 |
| PXOR X12, X2 |
| MOVOU 80(DI), X12 |
| PCLMULQDQ $0x00, X2, X12 |
| PXOR X12, X10 |
| MOVOU 96(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X3, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X3, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X3, X12 |
| PXOR X12, X3 |
| MOVOU 112(DI), X12 |
| PCLMULQDQ $0x00, X3, X12 |
| PXOR X12, X10 |
| MOVOU 128(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X4, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X4, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X4, X12 |
| PXOR X12, X4 |
| MOVOU 144(DI), X12 |
| PCLMULQDQ $0x00, X4, X12 |
| PXOR X12, X10 |
| MOVOU 160(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X5, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X5, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X5, X12 |
| PXOR X12, X5 |
| MOVOU 176(DI), X12 |
| PCLMULQDQ $0x00, X5, X12 |
| PXOR X12, X10 |
| MOVOU 192(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X6, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X6, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X6, X12 |
| PXOR X12, X6 |
| MOVOU 208(DI), X12 |
| PCLMULQDQ $0x00, X6, X12 |
| PXOR X12, X10 |
| MOVOU 224(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X7, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X7, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X7, X12 |
| PXOR X12, X7 |
| MOVOU 240(DI), X12 |
| PCLMULQDQ $0x00, X7, X12 |
| PXOR X12, X10 |
| PXOR X8, X10 |
| PXOR X9, X10 |
| MOVOU X10, X11 |
| PSRLDQ $0x08, X10 |
| PSLLDQ $0x08, X11 |
| PXOR X10, X9 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| PXOR X9, X8 |
| JMP dataOctaLoop |
| |
| startSinglesLoop: |
| MOVOU 224(DI), X12 |
| MOVOU 240(DI), X13 |
| |
| dataSinglesLoop: |
| CMPQ DX, $0x10 |
| JB dataEnd |
| SUBQ $0x10, DX |
| MOVOU (SI), X0 |
| |
| dataMul: |
| PSHUFB X15, X0 |
| PXOR X8, X0 |
| MOVOU X12, X8 |
| MOVOU X13, X10 |
| MOVOU X12, X9 |
| PSHUFD $0x4e, X0, X11 |
| PXOR X0, X11 |
| PCLMULQDQ $0x00, X0, X8 |
| PCLMULQDQ $0x11, X0, X9 |
| PCLMULQDQ $0x00, X11, X10 |
| PXOR X8, X10 |
| PXOR X9, X10 |
| MOVOU X10, X11 |
| PSRLDQ $0x08, X10 |
| PSLLDQ $0x08, X11 |
| PXOR X10, X9 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| PXOR X9, X8 |
| LEAQ 16(SI), SI |
| JMP dataSinglesLoop |
| |
| dataEnd: |
| TESTQ DX, DX |
| JEQ dataBail |
| PXOR X0, X0 |
| LEAQ -1(SI)(DX*1), SI |
| |
| dataLoadLoop: |
| PSLLDQ $0x01, X0 |
| PINSRB $0x00, (SI), X0 |
| LEAQ -1(SI), SI |
| DECQ DX |
| JNE dataLoadLoop |
| JMP dataMul |
| |
| dataBail: |
| MOVOU X8, (CX) |
| RET |
| |
| // func gcmAesEnc(productTable *[256]byte, dst []byte, src []byte, ctr *[16]byte, T *[16]byte, ks []uint32) |
| // Requires: AES, PCLMULQDQ, SSE2, SSE4.1, SSSE3 |
| TEXT ·gcmAesEnc(SB), $256-96 |
| MOVQ productTable+0(FP), DI |
| MOVQ dst_base+8(FP), DX |
| MOVQ src_base+32(FP), SI |
| MOVQ src_len+40(FP), R9 |
| MOVQ ctr+56(FP), CX |
| MOVQ T+64(FP), R8 |
| MOVQ ks_base+72(FP), AX |
| MOVQ ks_len+80(FP), R13 |
| SHRQ $0x02, R13 |
| DECQ R13 |
| MOVOU bswapMask<>+0(SB), X15 |
| MOVOU gcmPoly<>+0(SB), X14 |
| MOVOU (R8), X8 |
| PXOR X9, X9 |
| PXOR X10, X10 |
| MOVOU (CX), X0 |
| MOVL 12(CX), R10 |
| MOVOU (AX), X11 |
| MOVL 12(AX), R12 |
| BSWAPL R10 |
| BSWAPL R12 |
| PXOR X0, X11 |
| MOVOU X11, 128(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 140(SP) |
| CMPQ R9, $0x80 |
| JB gcmAesEncSingles |
| SUBQ $0x80, R9 |
| |
| // We have at least 8 blocks to encrypt, prepare the rest of the counters |
| MOVOU X11, 144(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 156(SP) |
| MOVOU X11, 160(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 172(SP) |
| MOVOU X11, 176(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 188(SP) |
| MOVOU X11, 192(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 204(SP) |
| MOVOU X11, 208(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 220(SP) |
| MOVOU X11, 224(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 236(SP) |
| MOVOU X11, 240(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 252(SP) |
| MOVOU 128(SP), X0 |
| MOVOU 144(SP), X1 |
| MOVOU 160(SP), X2 |
| MOVOU 176(SP), X3 |
| MOVOU 192(SP), X4 |
| MOVOU 208(SP), X5 |
| MOVOU 224(SP), X6 |
| MOVOU 240(SP), X7 |
| MOVOU 16(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 140(SP) |
| MOVOU 32(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 156(SP) |
| MOVOU 48(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 172(SP) |
| MOVOU 64(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 188(SP) |
| MOVOU 80(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 204(SP) |
| MOVOU 96(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 220(SP) |
| MOVOU 112(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 236(SP) |
| MOVOU 128(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 252(SP) |
| MOVOU 144(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 160(AX), X11 |
| CMPQ R13, $0x0c |
| JB encLast1 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 176(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 192(AX), X11 |
| JE encLast1 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 208(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 224(AX), X11 |
| |
| encLast1: |
| AESENCLAST X11, X0 |
| AESENCLAST X11, X1 |
| AESENCLAST X11, X2 |
| AESENCLAST X11, X3 |
| AESENCLAST X11, X4 |
| AESENCLAST X11, X5 |
| AESENCLAST X11, X6 |
| AESENCLAST X11, X7 |
| MOVOU (SI), X11 |
| PXOR X11, X0 |
| MOVOU 16(SI), X11 |
| PXOR X11, X1 |
| MOVOU 32(SI), X11 |
| PXOR X11, X2 |
| MOVOU 48(SI), X11 |
| PXOR X11, X3 |
| MOVOU 64(SI), X11 |
| PXOR X11, X4 |
| MOVOU 80(SI), X11 |
| PXOR X11, X5 |
| MOVOU 96(SI), X11 |
| PXOR X11, X6 |
| MOVOU 112(SI), X11 |
| PXOR X11, X7 |
| MOVOU X0, (DX) |
| PSHUFB X15, X0 |
| PXOR X8, X0 |
| MOVOU X1, 16(DX) |
| PSHUFB X15, X1 |
| MOVOU X2, 32(DX) |
| PSHUFB X15, X2 |
| MOVOU X3, 48(DX) |
| PSHUFB X15, X3 |
| MOVOU X4, 64(DX) |
| PSHUFB X15, X4 |
| MOVOU X5, 80(DX) |
| PSHUFB X15, X5 |
| MOVOU X6, 96(DX) |
| PSHUFB X15, X6 |
| MOVOU X7, 112(DX) |
| PSHUFB X15, X7 |
| MOVOU X0, (SP) |
| MOVOU X1, 16(SP) |
| MOVOU X2, 32(SP) |
| MOVOU X3, 48(SP) |
| MOVOU X4, 64(SP) |
| MOVOU X5, 80(SP) |
| MOVOU X6, 96(SP) |
| MOVOU X7, 112(SP) |
| LEAQ 128(SI), SI |
| LEAQ 128(DX), DX |
| |
| gcmAesEncOctetsLoop: |
| CMPQ R9, $0x80 |
| JB gcmAesEncOctetsEnd |
| SUBQ $0x80, R9 |
| MOVOU 128(SP), X0 |
| MOVOU 144(SP), X1 |
| MOVOU 160(SP), X2 |
| MOVOU 176(SP), X3 |
| MOVOU 192(SP), X4 |
| MOVOU 208(SP), X5 |
| MOVOU 224(SP), X6 |
| MOVOU 240(SP), X7 |
| MOVOU (SP), X11 |
| PSHUFD $0x4e, X11, X12 |
| PXOR X11, X12 |
| MOVOU (DI), X8 |
| MOVOU 16(DI), X10 |
| MOVOU X8, X9 |
| PCLMULQDQ $0x00, X12, X10 |
| PCLMULQDQ $0x00, X11, X8 |
| PCLMULQDQ $0x11, X11, X9 |
| MOVOU 16(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 32(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 16(SP), X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 48(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 140(SP) |
| MOVOU 32(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 64(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 32(SP), X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 80(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 156(SP) |
| MOVOU 48(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 96(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 48(SP), X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 112(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 172(SP) |
| MOVOU 64(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 128(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 64(SP), X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 144(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 188(SP) |
| MOVOU 80(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 160(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 80(SP), X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 176(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 204(SP) |
| MOVOU 96(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 192(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 96(SP), X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 208(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 220(SP) |
| MOVOU 112(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 224(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 112(SP), X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 240(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 236(SP) |
| MOVOU 128(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 252(SP) |
| PXOR X8, X10 |
| PXOR X9, X10 |
| MOVOU X10, X11 |
| PSRLDQ $0x08, X10 |
| PSLLDQ $0x08, X11 |
| PXOR X10, X9 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| MOVOU 144(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| PXOR X9, X8 |
| MOVOU 160(AX), X11 |
| CMPQ R13, $0x0c |
| JB encLast2 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 176(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 192(AX), X11 |
| JE encLast2 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 208(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 224(AX), X11 |
| |
| encLast2: |
| AESENCLAST X11, X0 |
| AESENCLAST X11, X1 |
| AESENCLAST X11, X2 |
| AESENCLAST X11, X3 |
| AESENCLAST X11, X4 |
| AESENCLAST X11, X5 |
| AESENCLAST X11, X6 |
| AESENCLAST X11, X7 |
| MOVOU (SI), X11 |
| PXOR X11, X0 |
| MOVOU 16(SI), X11 |
| PXOR X11, X1 |
| MOVOU 32(SI), X11 |
| PXOR X11, X2 |
| MOVOU 48(SI), X11 |
| PXOR X11, X3 |
| MOVOU 64(SI), X11 |
| PXOR X11, X4 |
| MOVOU 80(SI), X11 |
| PXOR X11, X5 |
| MOVOU 96(SI), X11 |
| PXOR X11, X6 |
| MOVOU 112(SI), X11 |
| PXOR X11, X7 |
| MOVOU X0, (DX) |
| PSHUFB X15, X0 |
| PXOR X8, X0 |
| MOVOU X1, 16(DX) |
| PSHUFB X15, X1 |
| MOVOU X2, 32(DX) |
| PSHUFB X15, X2 |
| MOVOU X3, 48(DX) |
| PSHUFB X15, X3 |
| MOVOU X4, 64(DX) |
| PSHUFB X15, X4 |
| MOVOU X5, 80(DX) |
| PSHUFB X15, X5 |
| MOVOU X6, 96(DX) |
| PSHUFB X15, X6 |
| MOVOU X7, 112(DX) |
| PSHUFB X15, X7 |
| MOVOU X0, (SP) |
| MOVOU X1, 16(SP) |
| MOVOU X2, 32(SP) |
| MOVOU X3, 48(SP) |
| MOVOU X4, 64(SP) |
| MOVOU X5, 80(SP) |
| MOVOU X6, 96(SP) |
| MOVOU X7, 112(SP) |
| LEAQ 128(SI), SI |
| LEAQ 128(DX), DX |
| JMP gcmAesEncOctetsLoop |
| |
| gcmAesEncOctetsEnd: |
| MOVOU (SP), X11 |
| MOVOU (DI), X8 |
| MOVOU 16(DI), X10 |
| MOVOU X8, X9 |
| PSHUFD $0x4e, X11, X12 |
| PXOR X11, X12 |
| PCLMULQDQ $0x00, X11, X8 |
| PCLMULQDQ $0x11, X11, X9 |
| PCLMULQDQ $0x00, X12, X10 |
| MOVOU 16(SP), X11 |
| MOVOU 32(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X11, X12 |
| PXOR X12, X11 |
| MOVOU 48(DI), X12 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X10 |
| MOVOU 32(SP), X11 |
| MOVOU 64(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X11, X12 |
| PXOR X12, X11 |
| MOVOU 80(DI), X12 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X10 |
| MOVOU 48(SP), X11 |
| MOVOU 96(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X11, X12 |
| PXOR X12, X11 |
| MOVOU 112(DI), X12 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X10 |
| MOVOU 64(SP), X11 |
| MOVOU 128(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X11, X12 |
| PXOR X12, X11 |
| MOVOU 144(DI), X12 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X10 |
| MOVOU 80(SP), X11 |
| MOVOU 160(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X11, X12 |
| PXOR X12, X11 |
| MOVOU 176(DI), X12 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X10 |
| MOVOU 96(SP), X11 |
| MOVOU 192(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X11, X12 |
| PXOR X12, X11 |
| MOVOU 208(DI), X12 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X10 |
| MOVOU 112(SP), X11 |
| MOVOU 224(DI), X12 |
| MOVOU X12, X13 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X13, X9 |
| PSHUFD $0x4e, X11, X12 |
| PXOR X12, X11 |
| MOVOU 240(DI), X12 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X10 |
| PXOR X8, X10 |
| PXOR X9, X10 |
| MOVOU X10, X11 |
| PSRLDQ $0x08, X10 |
| PSLLDQ $0x08, X11 |
| PXOR X10, X9 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| PXOR X9, X8 |
| TESTQ R9, R9 |
| JE gcmAesEncDone |
| SUBQ $0x07, R10 |
| |
| gcmAesEncSingles: |
| MOVOU 16(AX), X1 |
| MOVOU 32(AX), X2 |
| MOVOU 48(AX), X3 |
| MOVOU 64(AX), X4 |
| MOVOU 80(AX), X5 |
| MOVOU 96(AX), X6 |
| MOVOU 112(AX), X7 |
| MOVOU 224(DI), X13 |
| |
| gcmAesEncSinglesLoop: |
| CMPQ R9, $0x10 |
| JB gcmAesEncTail |
| SUBQ $0x10, R9 |
| MOVOU 128(SP), X0 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 140(SP) |
| AESENC X1, X0 |
| AESENC X2, X0 |
| AESENC X3, X0 |
| AESENC X4, X0 |
| AESENC X5, X0 |
| AESENC X6, X0 |
| AESENC X7, X0 |
| MOVOU 128(AX), X11 |
| AESENC X11, X0 |
| MOVOU 144(AX), X11 |
| AESENC X11, X0 |
| MOVOU 160(AX), X11 |
| CMPQ R13, $0x0c |
| JB encLast3 |
| AESENC X11, X0 |
| MOVOU 176(AX), X11 |
| AESENC X11, X0 |
| MOVOU 192(AX), X11 |
| JE encLast3 |
| AESENC X11, X0 |
| MOVOU 208(AX), X11 |
| AESENC X11, X0 |
| MOVOU 224(AX), X11 |
| |
| encLast3: |
| AESENCLAST X11, X0 |
| MOVOU (SI), X11 |
| PXOR X11, X0 |
| MOVOU X0, (DX) |
| PSHUFB X15, X0 |
| PXOR X8, X0 |
| MOVOU X13, X8 |
| MOVOU X13, X9 |
| MOVOU 240(DI), X10 |
| PSHUFD $0x4e, X0, X11 |
| PXOR X0, X11 |
| PCLMULQDQ $0x00, X0, X8 |
| PCLMULQDQ $0x11, X0, X9 |
| PCLMULQDQ $0x00, X11, X10 |
| PXOR X8, X10 |
| PXOR X9, X10 |
| MOVOU X10, X11 |
| PSRLDQ $0x08, X10 |
| PSLLDQ $0x08, X11 |
| PXOR X10, X9 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| PXOR X9, X8 |
| LEAQ 16(SI), SI |
| LEAQ 16(DX), DX |
| JMP gcmAesEncSinglesLoop |
| |
| gcmAesEncTail: |
| TESTQ R9, R9 |
| JE gcmAesEncDone |
| MOVOU 128(SP), X0 |
| AESENC X1, X0 |
| AESENC X2, X0 |
| AESENC X3, X0 |
| AESENC X4, X0 |
| AESENC X5, X0 |
| AESENC X6, X0 |
| AESENC X7, X0 |
| MOVOU 128(AX), X11 |
| AESENC X11, X0 |
| MOVOU 144(AX), X11 |
| AESENC X11, X0 |
| MOVOU 160(AX), X11 |
| CMPQ R13, $0x0c |
| JB encLast4 |
| AESENC X11, X0 |
| MOVOU 176(AX), X11 |
| AESENC X11, X0 |
| MOVOU 192(AX), X11 |
| JE encLast4 |
| AESENC X11, X0 |
| MOVOU 208(AX), X11 |
| AESENC X11, X0 |
| MOVOU 224(AX), X11 |
| |
| encLast4: |
| AESENCLAST X11, X0 |
| MOVOU X0, X11 |
| LEAQ -1(SI)(R9*1), SI |
| MOVQ R9, R11 |
| SHLQ $0x04, R11 |
| LEAQ andMask<>+0(SB), R10 |
| MOVOU -16(R10)(R11*1), X12 |
| PXOR X0, X0 |
| |
| ptxLoadLoop: |
| PSLLDQ $0x01, X0 |
| PINSRB $0x00, (SI), X0 |
| LEAQ -1(SI), SI |
| DECQ R9 |
| JNE ptxLoadLoop |
| PXOR X11, X0 |
| PAND X12, X0 |
| MOVOU X0, (DX) |
| PSHUFB X15, X0 |
| PXOR X8, X0 |
| MOVOU X13, X8 |
| MOVOU X13, X9 |
| MOVOU 240(DI), X10 |
| PSHUFD $0x4e, X0, X11 |
| PXOR X0, X11 |
| PCLMULQDQ $0x00, X0, X8 |
| PCLMULQDQ $0x11, X0, X9 |
| PCLMULQDQ $0x00, X11, X10 |
| PXOR X8, X10 |
| PXOR X9, X10 |
| MOVOU X10, X11 |
| PSRLDQ $0x08, X10 |
| PSLLDQ $0x08, X11 |
| PXOR X10, X9 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| PXOR X9, X8 |
| |
| gcmAesEncDone: |
| MOVOU X8, (R8) |
| RET |
| |
| DATA andMask<>+0(SB)/8, $0x00000000000000ff |
| DATA andMask<>+8(SB)/8, $0x0000000000000000 |
| DATA andMask<>+16(SB)/8, $0x000000000000ffff |
| DATA andMask<>+24(SB)/8, $0x0000000000000000 |
| DATA andMask<>+32(SB)/8, $0x0000000000ffffff |
| DATA andMask<>+40(SB)/8, $0x0000000000000000 |
| DATA andMask<>+48(SB)/8, $0x00000000ffffffff |
| DATA andMask<>+56(SB)/8, $0x0000000000000000 |
| DATA andMask<>+64(SB)/8, $0x000000ffffffffff |
| DATA andMask<>+72(SB)/8, $0x0000000000000000 |
| DATA andMask<>+80(SB)/8, $0x0000ffffffffffff |
| DATA andMask<>+88(SB)/8, $0x0000000000000000 |
| DATA andMask<>+96(SB)/8, $0x00ffffffffffffff |
| DATA andMask<>+104(SB)/8, $0x0000000000000000 |
| DATA andMask<>+112(SB)/8, $0xffffffffffffffff |
| DATA andMask<>+120(SB)/8, $0x0000000000000000 |
| DATA andMask<>+128(SB)/8, $0xffffffffffffffff |
| DATA andMask<>+136(SB)/8, $0x00000000000000ff |
| DATA andMask<>+144(SB)/8, $0xffffffffffffffff |
| DATA andMask<>+152(SB)/8, $0x000000000000ffff |
| DATA andMask<>+160(SB)/8, $0xffffffffffffffff |
| DATA andMask<>+168(SB)/8, $0x0000000000ffffff |
| DATA andMask<>+176(SB)/8, $0xffffffffffffffff |
| DATA andMask<>+184(SB)/8, $0x00000000ffffffff |
| DATA andMask<>+192(SB)/8, $0xffffffffffffffff |
| DATA andMask<>+200(SB)/8, $0x000000ffffffffff |
| DATA andMask<>+208(SB)/8, $0xffffffffffffffff |
| DATA andMask<>+216(SB)/8, $0x0000ffffffffffff |
| DATA andMask<>+224(SB)/8, $0xffffffffffffffff |
| DATA andMask<>+232(SB)/8, $0x00ffffffffffffff |
| GLOBL andMask<>(SB), RODATA|NOPTR, $240 |
| |
| // func gcmAesDec(productTable *[256]byte, dst []byte, src []byte, ctr *[16]byte, T *[16]byte, ks []uint32) |
| // Requires: AES, PCLMULQDQ, SSE2, SSE4.1, SSSE3 |
| TEXT ·gcmAesDec(SB), $128-96 |
| MOVQ productTable+0(FP), DI |
| MOVQ dst_base+8(FP), SI |
| MOVQ src_base+32(FP), DX |
| MOVQ src_len+40(FP), R9 |
| MOVQ ctr+56(FP), CX |
| MOVQ T+64(FP), R8 |
| MOVQ ks_base+72(FP), AX |
| MOVQ ks_len+80(FP), R13 |
| SHRQ $0x02, R13 |
| DECQ R13 |
| MOVOU bswapMask<>+0(SB), X15 |
| MOVOU gcmPoly<>+0(SB), X14 |
| MOVOU (R8), X8 |
| PXOR X9, X9 |
| PXOR X10, X10 |
| MOVOU (CX), X0 |
| MOVL 12(CX), R10 |
| MOVOU (AX), X11 |
| MOVL 12(AX), R12 |
| BSWAPL R10 |
| BSWAPL R12 |
| PXOR X0, X11 |
| MOVOU X11, (SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 12(SP) |
| CMPQ R9, $0x80 |
| JB gcmAesDecSingles |
| MOVOU X11, 16(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 28(SP) |
| MOVOU X11, 32(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 44(SP) |
| MOVOU X11, 48(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 60(SP) |
| MOVOU X11, 64(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 76(SP) |
| MOVOU X11, 80(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 92(SP) |
| MOVOU X11, 96(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 108(SP) |
| MOVOU X11, 112(SP) |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 124(SP) |
| |
| gcmAesDecOctetsLoop: |
| CMPQ R9, $0x80 |
| JB gcmAesDecEndOctets |
| SUBQ $0x80, R9 |
| MOVOU (SP), X0 |
| MOVOU 16(SP), X1 |
| MOVOU 32(SP), X2 |
| MOVOU 48(SP), X3 |
| MOVOU 64(SP), X4 |
| MOVOU 80(SP), X5 |
| MOVOU 96(SP), X6 |
| MOVOU 112(SP), X7 |
| MOVOU (DX), X11 |
| PSHUFB X15, X11 |
| PXOR X8, X11 |
| PSHUFD $0x4e, X11, X12 |
| PXOR X11, X12 |
| MOVOU (DI), X8 |
| MOVOU 16(DI), X10 |
| MOVOU X8, X9 |
| PCLMULQDQ $0x00, X12, X10 |
| PCLMULQDQ $0x00, X11, X8 |
| PCLMULQDQ $0x11, X11, X9 |
| MOVOU 16(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 32(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 16(DX), X11 |
| PSHUFB X15, X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 48(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 12(SP) |
| MOVOU 32(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 64(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 32(DX), X11 |
| PSHUFB X15, X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 80(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 28(SP) |
| MOVOU 48(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 96(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 48(DX), X11 |
| PSHUFB X15, X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 112(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 44(SP) |
| MOVOU 64(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 128(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 64(DX), X11 |
| PSHUFB X15, X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 144(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 60(SP) |
| MOVOU 80(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 160(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 80(DX), X11 |
| PSHUFB X15, X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 176(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 76(SP) |
| MOVOU 96(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 192(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 96(DX), X11 |
| PSHUFB X15, X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 208(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 92(SP) |
| MOVOU 112(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| MOVOU 224(DI), X12 |
| MOVOU X12, X13 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 112(DX), X11 |
| PSHUFB X15, X11 |
| PCLMULQDQ $0x00, X11, X12 |
| PXOR X12, X8 |
| PSHUFD $0x4e, X11, X12 |
| PCLMULQDQ $0x11, X11, X13 |
| PXOR X12, X11 |
| PXOR X13, X9 |
| MOVOU 240(DI), X13 |
| PCLMULQDQ $0x00, X13, X11 |
| PXOR X11, X10 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 108(SP) |
| MOVOU 128(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 124(SP) |
| PXOR X8, X10 |
| PXOR X9, X10 |
| MOVOU X10, X11 |
| PSRLDQ $0x08, X10 |
| PSLLDQ $0x08, X11 |
| PXOR X10, X9 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| MOVOU 144(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| PXOR X9, X8 |
| MOVOU 160(AX), X11 |
| CMPQ R13, $0x0c |
| JB decLast1 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 176(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 192(AX), X11 |
| JE decLast1 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 208(AX), X11 |
| AESENC X11, X0 |
| AESENC X11, X1 |
| AESENC X11, X2 |
| AESENC X11, X3 |
| AESENC X11, X4 |
| AESENC X11, X5 |
| AESENC X11, X6 |
| AESENC X11, X7 |
| MOVOU 224(AX), X11 |
| |
| decLast1: |
| AESENCLAST X11, X0 |
| AESENCLAST X11, X1 |
| AESENCLAST X11, X2 |
| AESENCLAST X11, X3 |
| AESENCLAST X11, X4 |
| AESENCLAST X11, X5 |
| AESENCLAST X11, X6 |
| AESENCLAST X11, X7 |
| MOVOU (DX), X11 |
| PXOR X11, X0 |
| MOVOU 16(DX), X11 |
| PXOR X11, X1 |
| MOVOU 32(DX), X11 |
| PXOR X11, X2 |
| MOVOU 48(DX), X11 |
| PXOR X11, X3 |
| MOVOU 64(DX), X11 |
| PXOR X11, X4 |
| MOVOU 80(DX), X11 |
| PXOR X11, X5 |
| MOVOU 96(DX), X11 |
| PXOR X11, X6 |
| MOVOU 112(DX), X11 |
| PXOR X11, X7 |
| MOVOU X0, (SI) |
| MOVOU X1, 16(SI) |
| MOVOU X2, 32(SI) |
| MOVOU X3, 48(SI) |
| MOVOU X4, 64(SI) |
| MOVOU X5, 80(SI) |
| MOVOU X6, 96(SI) |
| MOVOU X7, 112(SI) |
| LEAQ 128(SI), SI |
| LEAQ 128(DX), DX |
| JMP gcmAesDecOctetsLoop |
| |
| gcmAesDecEndOctets: |
| SUBQ $0x07, R10 |
| |
| gcmAesDecSingles: |
| MOVOU 16(AX), X1 |
| MOVOU 32(AX), X2 |
| MOVOU 48(AX), X3 |
| MOVOU 64(AX), X4 |
| MOVOU 80(AX), X5 |
| MOVOU 96(AX), X6 |
| MOVOU 112(AX), X7 |
| MOVOU 224(DI), X13 |
| |
| gcmAesDecSinglesLoop: |
| CMPQ R9, $0x10 |
| JB gcmAesDecTail |
| SUBQ $0x10, R9 |
| MOVOU (DX), X0 |
| MOVOU X0, X12 |
| PSHUFB X15, X0 |
| PXOR X8, X0 |
| MOVOU X13, X8 |
| MOVOU X13, X9 |
| MOVOU 240(DI), X10 |
| PCLMULQDQ $0x00, X0, X8 |
| PCLMULQDQ $0x11, X0, X9 |
| PSHUFD $0x4e, X0, X11 |
| PXOR X0, X11 |
| PCLMULQDQ $0x00, X11, X10 |
| PXOR X8, X10 |
| PXOR X9, X10 |
| MOVOU X10, X11 |
| PSRLDQ $0x08, X10 |
| PSLLDQ $0x08, X11 |
| PXOR X10, X9 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| PXOR X9, X8 |
| MOVOU (SP), X0 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 12(SP) |
| AESENC X1, X0 |
| AESENC X2, X0 |
| AESENC X3, X0 |
| AESENC X4, X0 |
| AESENC X5, X0 |
| AESENC X6, X0 |
| AESENC X7, X0 |
| MOVOU 128(AX), X11 |
| AESENC X11, X0 |
| MOVOU 144(AX), X11 |
| AESENC X11, X0 |
| MOVOU 160(AX), X11 |
| CMPQ R13, $0x0c |
| JB decLast2 |
| AESENC X11, X0 |
| MOVOU 176(AX), X11 |
| AESENC X11, X0 |
| MOVOU 192(AX), X11 |
| JE decLast2 |
| AESENC X11, X0 |
| MOVOU 208(AX), X11 |
| AESENC X11, X0 |
| MOVOU 224(AX), X11 |
| |
| decLast2: |
| AESENCLAST X11, X0 |
| PXOR X12, X0 |
| MOVOU X0, (SI) |
| LEAQ 16(SI), SI |
| LEAQ 16(DX), DX |
| JMP gcmAesDecSinglesLoop |
| |
| gcmAesDecTail: |
| TESTQ R9, R9 |
| JE gcmAesDecDone |
| MOVQ R9, R11 |
| SHLQ $0x04, R11 |
| LEAQ andMask<>+0(SB), R10 |
| MOVOU -16(R10)(R11*1), X12 |
| MOVOU (DX), X0 |
| PAND X12, X0 |
| MOVOU X0, X12 |
| PSHUFB X15, X0 |
| PXOR X8, X0 |
| MOVOU 224(DI), X8 |
| MOVOU 240(DI), X10 |
| MOVOU X8, X9 |
| PCLMULQDQ $0x00, X0, X8 |
| PCLMULQDQ $0x11, X0, X9 |
| PSHUFD $0x4e, X0, X11 |
| PXOR X0, X11 |
| PCLMULQDQ $0x00, X11, X10 |
| PXOR X8, X10 |
| PXOR X9, X10 |
| MOVOU X10, X11 |
| PSRLDQ $0x08, X10 |
| PSLLDQ $0x08, X11 |
| PXOR X10, X9 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| MOVOU X14, X11 |
| PCLMULQDQ $0x01, X8, X11 |
| PSHUFD $0x4e, X8, X8 |
| PXOR X11, X8 |
| PXOR X9, X8 |
| MOVOU (SP), X0 |
| ADDL $0x01, R10 |
| MOVL R10, R11 |
| XORL R12, R11 |
| BSWAPL R11 |
| MOVL R11, 12(SP) |
| AESENC X1, X0 |
| AESENC X2, X0 |
| AESENC X3, X0 |
| AESENC X4, X0 |
| AESENC X5, X0 |
| AESENC X6, X0 |
| AESENC X7, X0 |
| MOVOU 128(AX), X11 |
| AESENC X11, X0 |
| MOVOU 144(AX), X11 |
| AESENC X11, X0 |
| MOVOU 160(AX), X11 |
| CMPQ R13, $0x0c |
| JB decLast3 |
| AESENC X11, X0 |
| MOVOU 176(AX), X11 |
| AESENC X11, X0 |
| MOVOU 192(AX), X11 |
| JE decLast3 |
| AESENC X11, X0 |
| MOVOU 208(AX), X11 |
| AESENC X11, X0 |
| MOVOU 224(AX), X11 |
| |
| decLast3: |
| AESENCLAST X11, X0 |
| PXOR X12, X0 |
| |
| ptxStoreLoop: |
| PEXTRB $0x00, X0, (SI) |
| PSRLDQ $0x01, X0 |
| LEAQ 1(SI), SI |
| DECQ R9 |
| JNE ptxStoreLoop |
| |
| gcmAesDecDone: |
| MOVOU X8, (R8) |
| RET |