| // Copyright 2016 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Based on CRYPTOGAMS code with the following comment: |
| // # ==================================================================== |
| // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL |
| // # project. The module is, however, dual licensed under OpenSSL and |
| // # CRYPTOGAMS licenses depending on where you obtain it. For further |
| // # details see http://www.openssl.org/~appro/cryptogams/. |
| // # ==================================================================== |
| |
| // Original code can be found at the link below: |
| // https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl |
| |
| // Some function names were changed to be consistent with Go function |
| // names. For instance, function aes_p8_set_{en,de}crypt_key become |
| // set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts |
| // and a new session was created (doEncryptKeyAsm). This was necessary to |
| // avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm. |
| // There were other modifications as well but kept the same functionality. |
| |
| #include "textflag.h" |
| |
| // For expandKeyAsm |
| #define INP R3 |
| #define BITS R4 |
| #define OUTENC R5 // Pointer to next expanded encrypt key |
| #define PTR R6 |
| #define CNT R7 |
| #define ROUNDS R8 |
| #define OUTDEC R9 // Pointer to next expanded decrypt key |
| #define TEMP R19 |
| #define ZERO V0 |
| #define IN0 V1 |
| #define IN1 V2 |
| #define KEY V3 |
| #define RCON V4 |
| #define MASK V5 |
| #define TMP V6 |
| #define STAGE V7 |
| #define OUTPERM V8 |
| #define OUTMASK V9 |
| #define OUTHEAD V10 |
| #define OUTTAIL V11 |
| |
| // For P9 instruction emulation |
| #define ESPERM V21 // Endian swapping permute into BE |
| #define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXV |
| |
| // For {en,de}cryptBlockAsm |
| #define BLK_INP R3 |
| #define BLK_OUT R4 |
| #define BLK_KEY R5 |
| #define BLK_ROUNDS R6 |
| #define BLK_IDX R7 |
| |
| DATA ·rcon+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // Permute for vector doubleword endian swap |
| DATA ·rcon+0x08(SB)/8, $0x0706050403020100 |
| DATA ·rcon+0x10(SB)/8, $0x0100000001000000 // RCON |
| DATA ·rcon+0x18(SB)/8, $0x0100000001000000 // RCON |
| DATA ·rcon+0x20(SB)/8, $0x1b0000001b000000 |
| DATA ·rcon+0x28(SB)/8, $0x1b0000001b000000 |
| DATA ·rcon+0x30(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK |
| DATA ·rcon+0x38(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK |
| DATA ·rcon+0x40(SB)/8, $0x0000000000000000 |
| DATA ·rcon+0x48(SB)/8, $0x0000000000000000 |
| GLOBL ·rcon(SB), RODATA, $80 |
| |
| // Emulate unaligned BE vector load/stores on LE targets |
| #define P8_LXVB16X(RA,RB,VT) \ |
| LXVD2X (RA+RB), VT \ |
| VPERM VT, VT, ESPERM, VT |
| |
| #define P8_STXVB16X(VS,RA,RB) \ |
| VPERM VS, VS, ESPERM, TMP2 \ |
| STXVD2X TMP2, (RA+RB) |
| |
| #define P8_STXV(VS,RA,RB) \ |
| XXPERMDI VS, VS, $2, TMP2 \ |
| STXVD2X TMP2, (RA+RB) |
| |
| #define P8_LXV(RA,RB,VT) \ |
| LXVD2X (RA+RB), VT \ |
| XXPERMDI VT, VT, $2, VT |
| |
| #define LXSDX_BE(RA,RB,VT) \ |
| LXSDX (RA+RB), VT \ |
| VPERM VT, VT, ESPERM, VT |
| |
| // func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32) |
| TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0 |
| // Load the arguments inside the registers |
| MOVD nr+0(FP), ROUNDS |
| MOVD key+8(FP), INP |
| MOVD enc+16(FP), OUTENC |
| MOVD dec+24(FP), OUTDEC |
| |
| MOVD $·rcon(SB), PTR // PTR point to rcon addr |
| LVX (PTR), ESPERM |
| ADD $0x10, PTR |
| |
| // Get key from memory and write aligned into VR |
| P8_LXVB16X(INP, R0, IN0) |
| ADD $0x10, INP, INP |
| MOVD $0x20, TEMP |
| |
| CMPW ROUNDS, $12 |
| LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON |
| LVX (PTR)(TEMP), MASK |
| ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON |
| MOVD $8, CNT // li 7,8 CNT = 8 |
| VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :) |
| MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds) |
| |
| // The expanded decrypt key is the expanded encrypt key stored in reverse order. |
| // Move OUTDEC to the last key location, and store in descending order. |
| ADD $160, OUTDEC, OUTDEC |
| BLT loop128 |
| ADD $32, OUTDEC, OUTDEC |
| BEQ l192 |
| ADD $32, OUTDEC, OUTDEC |
| JMP l256 |
| |
| loop128: |
| // Key schedule (Round 1 to 8) |
| VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat |
| VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 |
| P8_STXV(IN0, R0, OUTENC) |
| P8_STXV(IN0, R0, OUTDEC) |
| VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 |
| ADD $16, OUTENC, OUTENC |
| ADD $-16, OUTDEC, OUTDEC |
| |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 |
| VXOR IN0, KEY, IN0 // vxor 1,1,3 |
| BC 0x10, 0, loop128 // bdnz .Loop128 |
| |
| LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys |
| |
| // Key schedule (Round 9) |
| VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat |
| VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 |
| P8_STXV(IN0, R0, OUTENC) |
| P8_STXV(IN0, R0, OUTDEC) |
| VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 |
| ADD $16, OUTENC, OUTENC |
| ADD $-16, OUTDEC, OUTDEC |
| |
| // Key schedule (Round 10) |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 |
| VXOR IN0, KEY, IN0 // vxor 1,1,3 |
| |
| VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat |
| VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 |
| P8_STXV(IN0, R0, OUTENC) |
| P8_STXV(IN0, R0, OUTDEC) |
| VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 |
| ADD $16, OUTENC, OUTENC |
| ADD $-16, OUTDEC, OUTDEC |
| |
| // Key schedule (Round 11) |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VXOR IN0, KEY, IN0 // vxor 1,1,3 |
| P8_STXV(IN0, R0, OUTENC) |
| P8_STXV(IN0, R0, OUTDEC) |
| |
| RET |
| |
| l192: |
| LXSDX_BE(INP, R0, IN1) // Load next 8 bytes into upper half of VSR in BE order. |
| MOVD $4, CNT // li 7,4 |
| P8_STXV(IN0, R0, OUTENC) |
| P8_STXV(IN0, R0, OUTDEC) |
| ADD $16, OUTENC, OUTENC |
| ADD $-16, OUTDEC, OUTDEC |
| VSPLTISB $8, KEY // vspltisb 3,8 |
| MOVD CNT, CTR // mtctr 7 |
| VSUBUBM MASK, KEY, MASK // vsububm 5,5,3 |
| |
| loop192: |
| VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 |
| VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 |
| VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 |
| |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| |
| VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8 |
| VSPLTW $3, IN0, TMP // vspltw 6,1,3 |
| VXOR TMP, IN1, TMP // vxor 6,6,2 |
| VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 |
| VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 |
| VXOR IN1, TMP, IN1 // vxor 2,2,6 |
| VXOR IN0, KEY, IN0 // vxor 1,1,3 |
| VXOR IN1, KEY, IN1 // vxor 2,2,3 |
| VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8 |
| |
| VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 |
| VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 |
| P8_STXV(STAGE, R0, OUTENC) |
| P8_STXV(STAGE, R0, OUTDEC) |
| VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 |
| ADD $16, OUTENC, OUTENC |
| ADD $-16, OUTDEC, OUTDEC |
| |
| VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8 |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| P8_STXV(STAGE, R0, OUTENC) |
| P8_STXV(STAGE, R0, OUTDEC) |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| ADD $16, OUTENC, OUTENC |
| ADD $-16, OUTDEC, OUTDEC |
| |
| VSPLTW $3, IN0, TMP // vspltw 6,1,3 |
| VXOR TMP, IN1, TMP // vxor 6,6,2 |
| VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 |
| VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 |
| VXOR IN1, TMP, IN1 // vxor 2,2,6 |
| VXOR IN0, KEY, IN0 // vxor 1,1,3 |
| VXOR IN1, KEY, IN1 // vxor 2,2,3 |
| P8_STXV(IN0, R0, OUTENC) |
| P8_STXV(IN0, R0, OUTDEC) |
| ADD $16, OUTENC, OUTENC |
| ADD $-16, OUTDEC, OUTDEC |
| BC 0x10, 0, loop192 // bdnz .Loop192 |
| |
| RET |
| |
| l256: |
| P8_LXVB16X(INP, R0, IN1) |
| MOVD $7, CNT // li 7,7 |
| P8_STXV(IN0, R0, OUTENC) |
| P8_STXV(IN0, R0, OUTDEC) |
| ADD $16, OUTENC, OUTENC |
| ADD $-16, OUTDEC, OUTDEC |
| MOVD CNT, CTR // mtctr 7 |
| |
| loop256: |
| VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 |
| VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 |
| P8_STXV(IN1, R0, OUTENC) |
| P8_STXV(IN1, R0, OUTDEC) |
| VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 |
| ADD $16, OUTENC, OUTENC |
| ADD $-16, OUTDEC, OUTDEC |
| |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| VXOR IN0, TMP, IN0 // vxor 1,1,6 |
| VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 |
| VXOR IN0, KEY, IN0 // vxor 1,1,3 |
| P8_STXV(IN0, R0, OUTENC) |
| P8_STXV(IN0, R0, OUTDEC) |
| ADD $16, OUTENC, OUTENC |
| ADD $-16, OUTDEC, OUTDEC |
| BC 0x12, 0, done // bdz .Ldone |
| |
| VSPLTW $3, IN0, KEY // vspltw 3,1,3 |
| VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12 |
| VSBOX KEY, KEY // vsbox 3,3 |
| |
| VXOR IN1, TMP, IN1 // vxor 2,2,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| VXOR IN1, TMP, IN1 // vxor 2,2,6 |
| VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 |
| VXOR IN1, TMP, IN1 // vxor 2,2,6 |
| |
| VXOR IN1, KEY, IN1 // vxor 2,2,3 |
| JMP loop256 // b .Loop256 |
| |
| done: |
| RET |
| |
| // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte) |
| TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 |
| // Load the arguments inside the registers |
| MOVD nr+0(FP), BLK_ROUNDS |
| MOVD xk+8(FP), BLK_KEY |
| MOVD dst+16(FP), BLK_OUT |
| MOVD src+24(FP), BLK_INP |
| |
| MOVD $15, BLK_IDX // li 7,15 |
| |
| LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 |
| NEG BLK_OUT, R11 // neg 11,4 |
| LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 |
| LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 |
| VSPLTISB $0x0f, RCON // vspltisb 4,0x0f |
| LVSR (R11)(R0), KEY // lvsr 3,0,11 |
| VXOR IN1, RCON, IN1 // vxor 2,2,4 |
| MOVD $16, BLK_IDX // li 7,16 |
| VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 |
| LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 |
| LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 |
| SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 |
| LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 |
| ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 |
| SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 |
| VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 |
| |
| VXOR ZERO, IN0, ZERO // vxor 0,0,1 |
| LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 |
| ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 |
| MOVD BLK_ROUNDS, CTR // mtctr 6 |
| |
| loop_enc: |
| VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 |
| VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 |
| LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 |
| ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 |
| VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 |
| VCIPHER ZERO, IN0, ZERO // vcipher 0,0,1 |
| LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 |
| ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 |
| BC 0x10, 0, loop_enc // bdnz .Loop_enc |
| |
| VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 |
| VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 |
| LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 |
| VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 |
| VCIPHERLAST ZERO, IN0, ZERO // vcipherlast 0,0,1 |
| |
| VSPLTISB $-1, IN1 // vspltisb 2,-1 |
| VXOR IN0, IN0, IN0 // vxor 1,1,1 |
| MOVD $15, BLK_IDX // li 7,15 |
| VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 |
| VXOR KEY, RCON, KEY // vxor 3,3,4 |
| LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 |
| VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 |
| VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 |
| LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 |
| STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 |
| VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 |
| STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 |
| |
| RET // blr |
| |
| // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte) |
| TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 |
| // Load the arguments inside the registers |
| MOVD nr+0(FP), BLK_ROUNDS |
| MOVD xk+8(FP), BLK_KEY |
| MOVD dst+16(FP), BLK_OUT |
| MOVD src+24(FP), BLK_INP |
| |
| MOVD $15, BLK_IDX // li 7,15 |
| |
| LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 |
| NEG BLK_OUT, R11 // neg 11,4 |
| LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 |
| LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 |
| VSPLTISB $0x0f, RCON // vspltisb 4,0x0f |
| LVSR (R11)(R0), KEY // lvsr 3,0,11 |
| VXOR IN1, RCON, IN1 // vxor 2,2,4 |
| MOVD $16, BLK_IDX // li 7,16 |
| VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 |
| LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 |
| LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 |
| SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 |
| LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 |
| ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 |
| SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 |
| VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 |
| |
| VXOR ZERO, IN0, ZERO // vxor 0,0,1 |
| LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 |
| ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 |
| MOVD BLK_ROUNDS, CTR // mtctr 6 |
| |
| loop_dec: |
| VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 |
| VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 |
| LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 |
| ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 |
| VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 |
| VNCIPHER ZERO, IN0, ZERO // vncipher 0,0,1 |
| LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 |
| ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 |
| BC 0x10, 0, loop_dec // bdnz .Loop_dec |
| |
| VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 |
| VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 |
| LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 |
| VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 |
| VNCIPHERLAST ZERO, IN0, ZERO // vncipherlast 0,0,1 |
| |
| VSPLTISB $-1, IN1 // vspltisb 2,-1 |
| VXOR IN0, IN0, IN0 // vxor 1,1,1 |
| MOVD $15, BLK_IDX // li 7,15 |
| VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 |
| VXOR KEY, RCON, KEY // vxor 3,3,4 |
| LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 |
| VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 |
| VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 |
| LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 |
| STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 |
| VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 |
| STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 |
| |
| RET // blr |
| |
| // Remove defines from above so they can be defined here |
| #undef INP |
| #undef OUTENC |
| #undef ROUNDS |
| #undef KEY |
| #undef TMP |
| #undef OUTPERM |
| #undef OUTMASK |
| #undef OUTHEAD |
| #undef OUTTAIL |
| |
| // CBC encrypt or decrypt |
| // R3 src |
| // R4 dst |
| // R5 len |
| // R6 key |
| // R7 iv |
| // R8 enc=1 dec=0 |
| // Ported from: aes_p8_cbc_encrypt |
| // Register usage: |
| // R9: ROUNDS |
| // R10: Index |
| // V0: initialized to 0 |
| // V3: initialized to mask |
| // V4: IV |
| // V5: SRC |
| // V6: IV perm mask |
| // V7: DST |
| // V10: KEY perm mask |
| |
| #define INP R3 |
| #define OUT R4 |
| #define LEN R5 |
| #define KEY R6 |
| #define IVP R7 |
| #define ENC R8 |
| #define ROUNDS R9 |
| #define IDX R10 |
| |
| #define RNDKEY0 V0 |
| #define RNDKEY1 V1 |
| #define INOUT V2 |
| #define TMP V3 |
| |
| #define IVEC V4 |
| #define INPTAIL V5 |
| #define INPPERM V6 |
| #define OUTHEAD V7 |
| #define OUTPERM V8 |
| #define OUTMASK V9 |
| #define KEYPERM V10 |
| |
| // Vector loads are done using LVX followed by |
| // a VPERM using mask generated from previous |
| // LVSL or LVSR instruction, to obtain the correct |
| // bytes if address is unaligned. |
| |
| // Encryption is done with VCIPHER and VCIPHERLAST |
| // Decryption is done with VNCIPHER and VNCIPHERLAST |
| |
| // Encrypt and decypt is done as follows: |
| // - INOUT value is initialized in outer loop. |
| // - ROUNDS value is adjusted for loop unrolling. |
| // - Encryption/decryption is done in loop based on |
| // adjusted ROUNDS value. |
| // - Final INOUT value is encrypted/decrypted and stored. |
| |
| // Note: original implementation had an 8X version |
| // for decryption which was omitted to avoid the |
| // complexity. |
| |
| // func cryptBlocksChain(src, dst *byte, length int, key *uint32, iv *byte, enc int, nr int) |
| TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0 |
| MOVD src+0(FP), INP |
| MOVD dst+8(FP), OUT |
| MOVD length+16(FP), LEN |
| MOVD key+24(FP), KEY |
| MOVD iv+32(FP), IVP |
| MOVD enc+40(FP), ENC |
| MOVD nr+48(FP), ROUNDS |
| |
| CMPU LEN, $16 // cmpldi r5,16 |
| BC 14, 0, LR // bltlr- |
| CMPW ENC, $0 // cmpwi r8,0 |
| MOVD $15, IDX // li r10,15 |
| VXOR RNDKEY0, RNDKEY0, RNDKEY0 // vxor v0,v0,v0 |
| VSPLTISB $0xf, TMP // vspltisb $0xf,v3 |
| |
| LVX (IVP)(R0), IVEC // lvx v4,r0,r7 |
| LVSL (IVP)(R0), INPPERM // lvsl v6,r0,r7 |
| LVX (IVP)(IDX), INPTAIL // lvx v5,r10,r7 |
| VXOR INPPERM, TMP, INPPERM // vxor v3, v6, v6 |
| VPERM IVEC, INPTAIL, INPPERM, IVEC // vperm v4,v4,v5,v6 |
| NEG INP, R11 // neg r11,r3 |
| LVSR (KEY)(R0), KEYPERM // lvsr v10,r0,r6 |
| LVSR (R11)(R0), V6 // lvsr v6,r0,r11 |
| LVX (INP)(R0), INPTAIL // lvx v5,r0,r3 |
| ADD $15, INP // addi r3,r3,15 |
| VXOR INPPERM, TMP, INPPERM // vxor v6, v3, v6 |
| LVSL (OUT)(R0), OUTPERM // lvsl v8,r0,r4 |
| VSPLTISB $-1, OUTMASK // vspltisb v9,-1 |
| LVX (OUT)(R0), OUTHEAD // lvx v7,r0,r4 |
| VPERM OUTMASK, RNDKEY0, OUTPERM, OUTMASK // vperm v9,v9,v0,v8 |
| VXOR OUTPERM, TMP, OUTPERM // vxor v8, v3, v8 |
| SRW $1, ROUNDS // rlwinm r9,r9,31,1,31 |
| |
| MOVD $16, IDX // li r10,16 |
| ADD $-1, ROUNDS // addi r9,r9,-1 |
| BEQ Lcbc_dec // beq |
| PCALIGN $16 |
| |
| // Outer loop: initialize encrypted value (INOUT) |
| // Load input (INPTAIL) ivec (IVEC) |
| Lcbc_enc: |
| VOR INPTAIL, INPTAIL, INOUT // vor v2,v5,v5 |
| LVX (INP)(R0), INPTAIL // lvx v5,r0,r3 |
| ADD $16, INP // addi r3,r3,16 |
| MOVD ROUNDS, CTR // mtctr r9 |
| ADD $-16, LEN // addi r5,r5,-16 |
| LVX (KEY)(R0), RNDKEY0 // lvx v0,r0,r6 |
| VPERM INOUT, INPTAIL, INPPERM, INOUT // vperm v2,v2,v5,v6 |
| LVX (KEY)(IDX), RNDKEY1 // lvx v1,r10,r6 |
| ADD $16, IDX // addi r10,r10,16 |
| VPERM RNDKEY1, RNDKEY0, KEYPERM, RNDKEY0 // vperm v0,v1,v0,v10 |
| VXOR INOUT, RNDKEY0, INOUT // vxor v2,v2,v0 |
| LVX (KEY)(IDX), RNDKEY0 // lvx v0,r10,r6 |
| ADD $16, IDX // addi r10,r10,16 |
| VXOR INOUT, IVEC, INOUT // vxor v2,v2,v4 |
| |
| // Encryption loop of INOUT using RNDKEY0 and RNDKEY1 |
| Loop_cbc_enc: |
| VPERM RNDKEY0, RNDKEY1, KEYPERM, RNDKEY1 // vperm v1,v1,v0,v10 |
| VCIPHER INOUT, RNDKEY1, INOUT // vcipher v2,v2,v1 |
| LVX (KEY)(IDX), RNDKEY1 // lvx v1,r10,r6 |
| ADD $16, IDX // addi r10,r10,16 |
| VPERM RNDKEY1, RNDKEY0, KEYPERM, RNDKEY0 // vperm v0,v0,v1,v10 |
| VCIPHER INOUT, RNDKEY0, INOUT // vcipher v2,v2,v0 |
| LVX (KEY)(IDX), RNDKEY0 // lvx v0,r10,r6 |
| ADD $16, IDX // addi r10,r10,16 |
| BC 16, 0, Loop_cbc_enc // bdnz Loop_cbc_enc |
| |
| // Encrypt tail values and store INOUT |
| VPERM RNDKEY0, RNDKEY1, KEYPERM, RNDKEY1 // vperm v1,v1,v0,v10 |
| VCIPHER INOUT, RNDKEY1, INOUT // vcipher v2,v2,v1 |
| LVX (KEY)(IDX), RNDKEY1 // lvx v1,r10,r6 |
| MOVD $16, IDX // li r10,16 |
| VPERM RNDKEY1, RNDKEY0, KEYPERM, RNDKEY0 // vperm v0,v0,v1,v10 |
| VCIPHERLAST INOUT, RNDKEY0, IVEC // vcipherlast v4,v2,v0 |
| CMPU LEN, $16 // cmpldi r5,16 |
| VPERM IVEC, IVEC, OUTPERM, TMP // vperm v3,v4,v4,v8 |
| VSEL OUTHEAD, TMP, OUTMASK, INOUT // vsel v2,v7,v3,v9 |
| VOR TMP, TMP, OUTHEAD // vor v7,v3,v3 |
| STVX INOUT, (OUT)(R0) // stvx v2,r0,r4 |
| ADD $16, OUT // addi r4,r4,16 |
| BGE Lcbc_enc // bge Lcbc_enc |
| BR Lcbc_done // b Lcbc_done |
| |
| // Outer loop: initialize decrypted value (INOUT) |
| // Load input (INPTAIL) ivec (IVEC) |
| Lcbc_dec: |
| VOR INPTAIL, INPTAIL, TMP // vor v3,v5,v5 |
| LVX (INP)(R0), INPTAIL // lvx v5,r0,r3 |
| ADD $16, INP // addi r3,r3,16 |
| MOVD ROUNDS, CTR // mtctr r9 |
| ADD $-16, LEN // addi r5,r5,-16 |
| LVX (KEY)(R0), RNDKEY0 // lvx v0,r0,r6 |
| VPERM TMP, INPTAIL, INPPERM, TMP // vperm v3,v3,v5,v6 |
| LVX (KEY)(IDX), RNDKEY1 // lvx v1,r10,r6 |
| ADD $16, IDX // addi r10,r10,16 |
| VPERM RNDKEY1, RNDKEY0, KEYPERM, RNDKEY0 // vperm v0,v1,v0,v10 |
| VXOR TMP, RNDKEY0, INOUT // vxor v2,v3,v0 |
| LVX (KEY)(IDX), RNDKEY0 // lvx v0,r10,r6 |
| ADD $16, IDX // addi r10,r10,16 |
| PCALIGN $16 |
| |
| // Decryption loop of INOUT using RNDKEY0 and RNDKEY1 |
| Loop_cbc_dec: |
| VPERM RNDKEY0, RNDKEY1, KEYPERM, RNDKEY1 // vperm v1,v0,v1,v10 |
| VNCIPHER INOUT, RNDKEY1, INOUT // vncipher v2,v2,v1 |
| LVX (KEY)(IDX), RNDKEY1 // lvx v1,r10,r6 |
| ADD $16, IDX // addi r10,r10,16 |
| VPERM RNDKEY1, RNDKEY0, KEYPERM, RNDKEY0 // vperm v0,v1,v0,v10 |
| VNCIPHER INOUT, RNDKEY0, INOUT // vncipher v2,v2,v0 |
| LVX (KEY)(IDX), RNDKEY0 // lvx v0,r10,r6 |
| ADD $16, IDX // addi r10,r10,16 |
| BC 16, 0, Loop_cbc_dec // bdnz |
| |
| // Decrypt tail values and store INOUT |
| VPERM RNDKEY0, RNDKEY1, KEYPERM, RNDKEY1 // vperm v1,v0,v1,v10 |
| VNCIPHER INOUT, RNDKEY1, INOUT // vncipher v2,v2,v1 |
| LVX (KEY)(IDX), RNDKEY1 // lvx v1,r10,r6 |
| MOVD $16, IDX // li r10,16 |
| VPERM RNDKEY1, RNDKEY0, KEYPERM, RNDKEY0 // vperm v0,v1,v0,v10 |
| VNCIPHERLAST INOUT, RNDKEY0, INOUT // vncipherlast v2,v2,v0 |
| CMPU LEN, $16 // cmpldi r5,16 |
| VXOR INOUT, IVEC, INOUT // vxor v2,v2,v4 |
| VOR TMP, TMP, IVEC // vor v4,v3,v3 |
| VPERM INOUT, INOUT, OUTPERM, TMP // vperm v3,v2,v2,v8 |
| VSEL OUTHEAD, TMP, OUTMASK, INOUT // vsel v2,v7,v3,v9 |
| VOR TMP, TMP, OUTHEAD // vor v7,v3,v3 |
| STVX INOUT, (OUT)(R0) // stvx v2,r0,r4 |
| ADD $16, OUT // addi r4,r4,16 |
| BGE Lcbc_dec // bge |
| |
| Lcbc_done: |
| ADD $-1, OUT // addi r4,r4,-1 |
| LVX (OUT)(R0), INOUT // lvx v2,r0,r4 |
| VSEL OUTHEAD, INOUT, OUTMASK, INOUT // vsel v2,v7,v2,v9 |
| STVX INOUT, (OUT)(R0) // stvx v2,r0,r4 |
| NEG IVP, ENC // neg r8,r7 |
| MOVD $15, IDX // li r10,15 |
| VXOR RNDKEY0, RNDKEY0, RNDKEY0 // vxor v0,v0,v0 |
| VSPLTISB $-1, OUTMASK // vspltisb v9,-1 |
| VSPLTISB $0xf, TMP // vspltisb v3, 0xf |
| LVSR (ENC)(R0), OUTPERM // lvsl v8,r0,r8 |
| VPERM OUTMASK, RNDKEY0, OUTPERM, OUTMASK // vperm v9,v9,v0,v8 |
| VXOR OUTPERM, TMP, OUTPERM // vxor v9, v3, v9 |
| LVX (IVP)(R0), OUTHEAD // lvx v7,r0,r7 |
| VPERM IVEC, IVEC, OUTPERM, IVEC // vperm v4,v4,v4,v8 |
| VSEL OUTHEAD, IVEC, OUTMASK, INOUT // vsel v2,v7,v4,v9 |
| LVX (IVP)(IDX), INPTAIL // lvx v5,r10,r7 |
| STVX INOUT, (IVP)(R0) // stvx v2,r0,r7 |
| VSEL IVEC, INPTAIL, OUTMASK, INOUT // vsel v2,v4,v5,v9 |
| STVX INOUT, (IVP)(IDX) // stvx v2,r10,r7 |
| RET // bclr 20,lt,0 |
| |