| // Code generated by command: go run ctr_amd64_asm.go -out ../../ctr_amd64.s. DO NOT EDIT. |
| |
| //go:build !purego |
| |
| #include "textflag.h" |
| |
| // func ctrBlocks1Asm(nr int, xk *[60]uint32, dst *[16]byte, src *[16]byte, ivlo uint64, ivhi uint64) |
| // Requires: AES, SSE, SSE2, SSE4.1, SSSE3 |
| TEXT ·ctrBlocks1Asm(SB), $0-48 |
| MOVQ nr+0(FP), AX |
| MOVQ xk+8(FP), CX |
| MOVQ dst+16(FP), DX |
| MOVQ src+24(FP), BX |
| MOVQ ivlo+32(FP), SI |
| MOVQ ivhi+40(FP), DI |
| MOVOU bswapMask<>+0(SB), X0 |
| MOVQ SI, X1 |
| PINSRQ $0x01, DI, X1 |
| PSHUFB X0, X1 |
| MOVUPS (CX), X0 |
| PXOR X0, X1 |
| ADDQ $0x10, CX |
| SUBQ $0x0c, AX |
| JE enc192 |
| JB enc128 |
| MOVUPS (CX), X0 |
| AESENC X0, X1 |
| MOVUPS 16(CX), X0 |
| AESENC X0, X1 |
| ADDQ $0x20, CX |
| |
| enc192: |
| MOVUPS (CX), X0 |
| AESENC X0, X1 |
| MOVUPS 16(CX), X0 |
| AESENC X0, X1 |
| ADDQ $0x20, CX |
| |
| enc128: |
| MOVUPS (CX), X0 |
| AESENC X0, X1 |
| MOVUPS 16(CX), X0 |
| AESENC X0, X1 |
| MOVUPS 32(CX), X0 |
| AESENC X0, X1 |
| MOVUPS 48(CX), X0 |
| AESENC X0, X1 |
| MOVUPS 64(CX), X0 |
| AESENC X0, X1 |
| MOVUPS 80(CX), X0 |
| AESENC X0, X1 |
| MOVUPS 96(CX), X0 |
| AESENC X0, X1 |
| MOVUPS 112(CX), X0 |
| AESENC X0, X1 |
| MOVUPS 128(CX), X0 |
| AESENC X0, X1 |
| MOVUPS 144(CX), X0 |
| AESENCLAST X0, X1 |
| MOVUPS (BX), X0 |
| PXOR X1, X0 |
| MOVUPS X0, (DX) |
| RET |
| |
| DATA bswapMask<>+0(SB)/8, $0x08090a0b0c0d0e0f |
| DATA bswapMask<>+8(SB)/8, $0x0001020304050607 |
| GLOBL bswapMask<>(SB), RODATA|NOPTR, $16 |
| |
| // func ctrBlocks2Asm(nr int, xk *[60]uint32, dst *[32]byte, src *[32]byte, ivlo uint64, ivhi uint64) |
| // Requires: AES, SSE, SSE2, SSE4.1, SSSE3 |
| TEXT ·ctrBlocks2Asm(SB), $0-48 |
| MOVQ nr+0(FP), AX |
| MOVQ xk+8(FP), CX |
| MOVQ dst+16(FP), DX |
| MOVQ src+24(FP), BX |
| MOVQ ivlo+32(FP), SI |
| MOVQ ivhi+40(FP), DI |
| MOVOU bswapMask<>+0(SB), X0 |
| MOVQ SI, X1 |
| PINSRQ $0x01, DI, X1 |
| PSHUFB X0, X1 |
| ADDQ $0x01, SI |
| ADCQ $0x00, DI |
| MOVQ SI, X2 |
| PINSRQ $0x01, DI, X2 |
| PSHUFB X0, X2 |
| MOVUPS (CX), X0 |
| PXOR X0, X1 |
| PXOR X0, X2 |
| ADDQ $0x10, CX |
| SUBQ $0x0c, AX |
| JE enc192 |
| JB enc128 |
| MOVUPS (CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| MOVUPS 16(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| ADDQ $0x20, CX |
| |
| enc192: |
| MOVUPS (CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| MOVUPS 16(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| ADDQ $0x20, CX |
| |
| enc128: |
| MOVUPS (CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| MOVUPS 16(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| MOVUPS 32(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| MOVUPS 48(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| MOVUPS 64(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| MOVUPS 80(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| MOVUPS 96(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| MOVUPS 112(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| MOVUPS 128(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| MOVUPS 144(CX), X0 |
| AESENCLAST X0, X1 |
| AESENCLAST X0, X2 |
| MOVUPS (BX), X0 |
| PXOR X1, X0 |
| MOVUPS X0, (DX) |
| MOVUPS 16(BX), X0 |
| PXOR X2, X0 |
| MOVUPS X0, 16(DX) |
| RET |
| |
| // func ctrBlocks4Asm(nr int, xk *[60]uint32, dst *[64]byte, src *[64]byte, ivlo uint64, ivhi uint64) |
| // Requires: AES, SSE, SSE2, SSE4.1, SSSE3 |
| TEXT ·ctrBlocks4Asm(SB), $0-48 |
| MOVQ nr+0(FP), AX |
| MOVQ xk+8(FP), CX |
| MOVQ dst+16(FP), DX |
| MOVQ src+24(FP), BX |
| MOVQ ivlo+32(FP), SI |
| MOVQ ivhi+40(FP), DI |
| MOVOU bswapMask<>+0(SB), X0 |
| MOVQ SI, X1 |
| PINSRQ $0x01, DI, X1 |
| PSHUFB X0, X1 |
| ADDQ $0x01, SI |
| ADCQ $0x00, DI |
| MOVQ SI, X2 |
| PINSRQ $0x01, DI, X2 |
| PSHUFB X0, X2 |
| ADDQ $0x01, SI |
| ADCQ $0x00, DI |
| MOVQ SI, X3 |
| PINSRQ $0x01, DI, X3 |
| PSHUFB X0, X3 |
| ADDQ $0x01, SI |
| ADCQ $0x00, DI |
| MOVQ SI, X4 |
| PINSRQ $0x01, DI, X4 |
| PSHUFB X0, X4 |
| MOVUPS (CX), X0 |
| PXOR X0, X1 |
| PXOR X0, X2 |
| PXOR X0, X3 |
| PXOR X0, X4 |
| ADDQ $0x10, CX |
| SUBQ $0x0c, AX |
| JE enc192 |
| JB enc128 |
| MOVUPS (CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| MOVUPS 16(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| ADDQ $0x20, CX |
| |
| enc192: |
| MOVUPS (CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| MOVUPS 16(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| ADDQ $0x20, CX |
| |
| enc128: |
| MOVUPS (CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| MOVUPS 16(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| MOVUPS 32(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| MOVUPS 48(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| MOVUPS 64(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| MOVUPS 80(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| MOVUPS 96(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| MOVUPS 112(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| MOVUPS 128(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| MOVUPS 144(CX), X0 |
| AESENCLAST X0, X1 |
| AESENCLAST X0, X2 |
| AESENCLAST X0, X3 |
| AESENCLAST X0, X4 |
| MOVUPS (BX), X0 |
| PXOR X1, X0 |
| MOVUPS X0, (DX) |
| MOVUPS 16(BX), X0 |
| PXOR X2, X0 |
| MOVUPS X0, 16(DX) |
| MOVUPS 32(BX), X0 |
| PXOR X3, X0 |
| MOVUPS X0, 32(DX) |
| MOVUPS 48(BX), X0 |
| PXOR X4, X0 |
| MOVUPS X0, 48(DX) |
| RET |
| |
| // func ctrBlocks8Asm(nr int, xk *[60]uint32, dst *[128]byte, src *[128]byte, ivlo uint64, ivhi uint64) |
| // Requires: AES, SSE, SSE2, SSE4.1, SSSE3 |
| TEXT ·ctrBlocks8Asm(SB), $0-48 |
| MOVQ nr+0(FP), AX |
| MOVQ xk+8(FP), CX |
| MOVQ dst+16(FP), DX |
| MOVQ src+24(FP), BX |
| MOVQ ivlo+32(FP), SI |
| MOVQ ivhi+40(FP), DI |
| MOVOU bswapMask<>+0(SB), X0 |
| MOVQ SI, X1 |
| PINSRQ $0x01, DI, X1 |
| MOVAPS X1, X8 |
| PSHUFB X0, X1 |
| MOVQ SI, R8 |
| ADDQ $0x07, R8 |
| JC ctr8_slow |
| XORQ R8, R8 |
| INCQ R8 |
| PXOR X9, X9 |
| PINSRQ $0x00, R8, X9 |
| PADDQ X9, X8 |
| MOVAPS X8, X2 |
| PADDQ X9, X8 |
| MOVAPS X8, X3 |
| PADDQ X9, X8 |
| MOVAPS X8, X4 |
| PADDQ X9, X8 |
| MOVAPS X8, X5 |
| PADDQ X9, X8 |
| MOVAPS X8, X6 |
| PADDQ X9, X8 |
| MOVAPS X8, X7 |
| PADDQ X9, X8 |
| MOVAPS X8, X8 |
| JMP ctr8_done |
| |
| ctr8_slow: |
| ADDQ $0x01, SI |
| ADCQ $0x00, DI |
| MOVQ SI, X2 |
| PINSRQ $0x01, DI, X2 |
| ADDQ $0x01, SI |
| ADCQ $0x00, DI |
| MOVQ SI, X3 |
| PINSRQ $0x01, DI, X3 |
| ADDQ $0x01, SI |
| ADCQ $0x00, DI |
| MOVQ SI, X4 |
| PINSRQ $0x01, DI, X4 |
| ADDQ $0x01, SI |
| ADCQ $0x00, DI |
| MOVQ SI, X5 |
| PINSRQ $0x01, DI, X5 |
| ADDQ $0x01, SI |
| ADCQ $0x00, DI |
| MOVQ SI, X6 |
| PINSRQ $0x01, DI, X6 |
| ADDQ $0x01, SI |
| ADCQ $0x00, DI |
| MOVQ SI, X7 |
| PINSRQ $0x01, DI, X7 |
| ADDQ $0x01, SI |
| ADCQ $0x00, DI |
| MOVQ SI, X8 |
| PINSRQ $0x01, DI, X8 |
| |
| ctr8_done: |
| PSHUFB X0, X2 |
| PSHUFB X0, X3 |
| PSHUFB X0, X4 |
| PSHUFB X0, X5 |
| PSHUFB X0, X6 |
| PSHUFB X0, X7 |
| PSHUFB X0, X8 |
| MOVUPS (CX), X0 |
| PXOR X0, X1 |
| PXOR X0, X2 |
| PXOR X0, X3 |
| PXOR X0, X4 |
| PXOR X0, X5 |
| PXOR X0, X6 |
| PXOR X0, X7 |
| PXOR X0, X8 |
| ADDQ $0x10, CX |
| SUBQ $0x0c, AX |
| JE enc192 |
| JB enc128 |
| MOVUPS (CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| AESENC X0, X5 |
| AESENC X0, X6 |
| AESENC X0, X7 |
| AESENC X0, X8 |
| MOVUPS 16(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| AESENC X0, X5 |
| AESENC X0, X6 |
| AESENC X0, X7 |
| AESENC X0, X8 |
| ADDQ $0x20, CX |
| |
| enc192: |
| MOVUPS (CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| AESENC X0, X5 |
| AESENC X0, X6 |
| AESENC X0, X7 |
| AESENC X0, X8 |
| MOVUPS 16(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| AESENC X0, X5 |
| AESENC X0, X6 |
| AESENC X0, X7 |
| AESENC X0, X8 |
| ADDQ $0x20, CX |
| |
| enc128: |
| MOVUPS (CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| AESENC X0, X5 |
| AESENC X0, X6 |
| AESENC X0, X7 |
| AESENC X0, X8 |
| MOVUPS 16(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| AESENC X0, X5 |
| AESENC X0, X6 |
| AESENC X0, X7 |
| AESENC X0, X8 |
| MOVUPS 32(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| AESENC X0, X5 |
| AESENC X0, X6 |
| AESENC X0, X7 |
| AESENC X0, X8 |
| MOVUPS 48(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| AESENC X0, X5 |
| AESENC X0, X6 |
| AESENC X0, X7 |
| AESENC X0, X8 |
| MOVUPS 64(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| AESENC X0, X5 |
| AESENC X0, X6 |
| AESENC X0, X7 |
| AESENC X0, X8 |
| MOVUPS 80(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| AESENC X0, X5 |
| AESENC X0, X6 |
| AESENC X0, X7 |
| AESENC X0, X8 |
| MOVUPS 96(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| AESENC X0, X5 |
| AESENC X0, X6 |
| AESENC X0, X7 |
| AESENC X0, X8 |
| MOVUPS 112(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| AESENC X0, X5 |
| AESENC X0, X6 |
| AESENC X0, X7 |
| AESENC X0, X8 |
| MOVUPS 128(CX), X0 |
| AESENC X0, X1 |
| AESENC X0, X2 |
| AESENC X0, X3 |
| AESENC X0, X4 |
| AESENC X0, X5 |
| AESENC X0, X6 |
| AESENC X0, X7 |
| AESENC X0, X8 |
| MOVUPS 144(CX), X0 |
| AESENCLAST X0, X1 |
| AESENCLAST X0, X2 |
| AESENCLAST X0, X3 |
| AESENCLAST X0, X4 |
| AESENCLAST X0, X5 |
| AESENCLAST X0, X6 |
| AESENCLAST X0, X7 |
| AESENCLAST X0, X8 |
| MOVUPS (BX), X0 |
| PXOR X1, X0 |
| MOVUPS X0, (DX) |
| MOVUPS 16(BX), X0 |
| PXOR X2, X0 |
| MOVUPS X0, 16(DX) |
| MOVUPS 32(BX), X0 |
| PXOR X3, X0 |
| MOVUPS X0, 32(DX) |
| MOVUPS 48(BX), X0 |
| PXOR X4, X0 |
| MOVUPS X0, 48(DX) |
| MOVUPS 64(BX), X0 |
| PXOR X5, X0 |
| MOVUPS X0, 64(DX) |
| MOVUPS 80(BX), X0 |
| PXOR X6, X0 |
| MOVUPS X0, 80(DX) |
| MOVUPS 96(BX), X0 |
| PXOR X7, X0 |
| MOVUPS X0, 96(DX) |
| MOVUPS 112(BX), X0 |
| PXOR X8, X0 |
| MOVUPS X0, 112(DX) |
| RET |