| // Code generated by command: go run sum_amd64_asm.go -out ../sum_amd64.s -pkg poly1305. DO NOT EDIT. |
| |
| //go:build gc && !purego |
| |
| // func update(state *macState, msg []byte) |
| TEXT ·update(SB), $0-32 |
| MOVQ state+0(FP), DI |
| MOVQ msg_base+8(FP), SI |
| MOVQ msg_len+16(FP), R15 |
| MOVQ (DI), R8 |
| MOVQ 8(DI), R9 |
| MOVQ 16(DI), R10 |
| MOVQ 24(DI), R11 |
| MOVQ 32(DI), R12 |
| CMPQ R15, $0x10 |
| JB bytes_between_0_and_15 |
| |
| loop: |
| ADDQ (SI), R8 |
| ADCQ 8(SI), R9 |
| ADCQ $0x01, R10 |
| LEAQ 16(SI), SI |
| |
| multiply: |
| MOVQ R11, AX |
| MULQ R8 |
| MOVQ AX, BX |
| MOVQ DX, CX |
| MOVQ R11, AX |
| MULQ R9 |
| ADDQ AX, CX |
| ADCQ $0x00, DX |
| MOVQ R11, R13 |
| IMULQ R10, R13 |
| ADDQ DX, R13 |
| MOVQ R12, AX |
| MULQ R8 |
| ADDQ AX, CX |
| ADCQ $0x00, DX |
| MOVQ DX, R8 |
| MOVQ R12, R14 |
| IMULQ R10, R14 |
| MOVQ R12, AX |
| MULQ R9 |
| ADDQ AX, R13 |
| ADCQ DX, R14 |
| ADDQ R8, R13 |
| ADCQ $0x00, R14 |
| MOVQ BX, R8 |
| MOVQ CX, R9 |
| MOVQ R13, R10 |
| ANDQ $0x03, R10 |
| MOVQ R13, BX |
| ANDQ $-4, BX |
| ADDQ BX, R8 |
| ADCQ R14, R9 |
| ADCQ $0x00, R10 |
| SHRQ $0x02, R14, R13 |
| SHRQ $0x02, R14 |
| ADDQ R13, R8 |
| ADCQ R14, R9 |
| ADCQ $0x00, R10 |
| SUBQ $0x10, R15 |
| CMPQ R15, $0x10 |
| JAE loop |
| |
| bytes_between_0_and_15: |
| TESTQ R15, R15 |
| JZ done |
| MOVQ $0x00000001, BX |
| XORQ CX, CX |
| XORQ R13, R13 |
| ADDQ R15, SI |
| |
| flush_buffer: |
| SHLQ $0x08, BX, CX |
| SHLQ $0x08, BX |
| MOVB -1(SI), R13 |
| XORQ R13, BX |
| DECQ SI |
| DECQ R15 |
| JNZ flush_buffer |
| ADDQ BX, R8 |
| ADCQ CX, R9 |
| ADCQ $0x00, R10 |
| MOVQ $0x00000010, R15 |
| JMP multiply |
| |
| done: |
| MOVQ R8, (DI) |
| MOVQ R9, 8(DI) |
| MOVQ R10, 16(DI) |
| RET |