| // Copyright 2014 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build ppc64 || ppc64le |
| |
| #include "textflag.h" |
| |
| // See memmove Go doc for important implementation constraints. |
| |
| // func memmove(to, from unsafe.Pointer, n uintptr) |
| |
| // target address |
| #define TGT R3 |
| // source address |
| #define SRC R4 |
| // length to move |
| #define LEN R5 |
| // number of doublewords |
| #define DWORDS R6 |
| // number of bytes < 8 |
| #define BYTES R7 |
| // const 16 used as index |
| #define IDX16 R8 |
| // temp used for copies, etc. |
| #define TMP R9 |
| // number of 64 byte chunks |
| #define QWORDS R10 |
| // index values |
| #define IDX32 R14 |
| #define IDX48 R15 |
| #define OCTWORDS R16 |
| |
| TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24 |
| // R3 = TGT = to |
| // R4 = SRC = from |
| // R5 = LEN = n |
| |
| // Determine if there are doublewords to |
| // copy so a more efficient move can be done |
| check: |
| ANDCC $7, LEN, BYTES // R7: bytes to copy |
| SRD $3, LEN, DWORDS // R6: double words to copy |
| MOVFL CR0, CR3 // save CR from ANDCC |
| CMP DWORDS, $0, CR1 // CR1[EQ] set if no double words to copy |
| |
| // Determine overlap by subtracting dest - src and comparing against the |
| // length. This catches the cases where src and dest are in different types |
| // of storage such as stack and static to avoid doing backward move when not |
| // necessary. |
| |
| SUB SRC, TGT, TMP // dest - src |
| CMPU TMP, LEN, CR2 // < len? |
| BC 12, 8, backward // BLT CR2 backward |
| |
| // Copying forward if no overlap. |
| |
| BC 12, 6, checkbytes // BEQ CR1, checkbytes |
| SRDCC $3, DWORDS, OCTWORDS // 64 byte chunks? |
| MOVD $16, IDX16 |
| BEQ lt64gt8 // < 64 bytes |
| |
| // Prepare for moves of 64 bytes at a time. |
| |
| forward64setup: |
| DCBTST (TGT) // prepare data cache |
| DCBT (SRC) |
| MOVD OCTWORDS, CTR // Number of 64 byte chunks |
| MOVD $32, IDX32 |
| MOVD $48, IDX48 |
| PCALIGN $32 |
| |
| forward64: |
| LXVD2X (R0)(SRC), VS32 // load 64 bytes |
| LXVD2X (IDX16)(SRC), VS33 |
| LXVD2X (IDX32)(SRC), VS34 |
| LXVD2X (IDX48)(SRC), VS35 |
| ADD $64, SRC |
| STXVD2X VS32, (R0)(TGT) // store 64 bytes |
| STXVD2X VS33, (IDX16)(TGT) |
| STXVD2X VS34, (IDX32)(TGT) |
| STXVD2X VS35, (IDX48)(TGT) |
| ADD $64,TGT // bump up for next set |
| BC 16, 0, forward64 // continue |
| ANDCC $7, DWORDS // remaining doublewords |
| BEQ checkbytes // only bytes remain |
| |
| lt64gt8: |
| CMP DWORDS, $4 |
| BLT lt32gt8 |
| LXVD2X (R0)(SRC), VS32 |
| LXVD2X (IDX16)(SRC), VS33 |
| ADD $-4, DWORDS |
| STXVD2X VS32, (R0)(TGT) |
| STXVD2X VS33, (IDX16)(TGT) |
| ADD $32, SRC |
| ADD $32, TGT |
| |
| lt32gt8: |
| // At this point >= 8 and < 32 |
| // Move 16 bytes if possible |
| CMP DWORDS, $2 |
| BLT lt16 |
| LXVD2X (R0)(SRC), VS32 |
| ADD $-2, DWORDS |
| STXVD2X VS32, (R0)(TGT) |
| ADD $16, SRC |
| ADD $16, TGT |
| |
| lt16: // Move 8 bytes if possible |
| CMP DWORDS, $1 |
| BLT checkbytes |
| MOVD 0(SRC), TMP |
| ADD $8, SRC |
| MOVD TMP, 0(TGT) |
| ADD $8, TGT |
| checkbytes: |
| BC 12, 14, LR // BEQ lr |
| lt8: // Move word if possible |
| CMP BYTES, $4 |
| BLT lt4 |
| MOVWZ 0(SRC), TMP |
| ADD $-4, BYTES |
| MOVW TMP, 0(TGT) |
| ADD $4, SRC |
| ADD $4, TGT |
| lt4: // Move halfword if possible |
| CMP BYTES, $2 |
| BLT lt2 |
| MOVHZ 0(SRC), TMP |
| ADD $-2, BYTES |
| MOVH TMP, 0(TGT) |
| ADD $2, SRC |
| ADD $2, TGT |
| lt2: // Move last byte if 1 left |
| CMP BYTES, $1 |
| BC 12, 0, LR // ble lr |
| MOVBZ 0(SRC), TMP |
| MOVBZ TMP, 0(TGT) |
| RET |
| |
| backward: |
| // Copying backwards proceeds by copying R7 bytes then copying R6 double words. |
| // R3 and R4 are advanced to the end of the destination/source buffers |
| // respectively and moved back as we copy. |
| |
| ADD LEN, SRC, SRC // end of source |
| ADD TGT, LEN, TGT // end of dest |
| |
| BEQ nobackwardtail // earlier condition |
| |
| MOVD BYTES, CTR // bytes to move |
| |
| backwardtailloop: |
| MOVBZ -1(SRC), TMP // point to last byte |
| SUB $1,SRC |
| MOVBZ TMP, -1(TGT) |
| SUB $1,TGT |
| BDNZ backwardtailloop |
| |
| nobackwardtail: |
| BC 4, 5, LR // blelr cr1, return if DWORDS == 0 |
| SRDCC $2,DWORDS,QWORDS // Compute number of 32B blocks and compare to 0 |
| BNE backward32setup // If QWORDS != 0, start the 32B copy loop. |
| |
| backward24: |
| // DWORDS is a value between 1-3. |
| CMP DWORDS, $2 |
| |
| MOVD -8(SRC), TMP |
| MOVD TMP, -8(TGT) |
| BC 12, 0, LR // bltlr, return if DWORDS == 1 |
| |
| MOVD -16(SRC), TMP |
| MOVD TMP, -16(TGT) |
| BC 12, 2, LR // beqlr, return if DWORDS == 2 |
| |
| MOVD -24(SRC), TMP |
| MOVD TMP, -24(TGT) |
| RET |
| |
| backward32setup: |
| ANDCC $3,DWORDS // Compute remaining DWORDS and compare to 0 |
| MOVD QWORDS, CTR // set up loop ctr |
| MOVD $16, IDX16 // 32 bytes at a time |
| |
| backward32loop: |
| SUB $32, TGT |
| SUB $32, SRC |
| LXVD2X (R0)(SRC), VS32 // load 16x2 bytes |
| LXVD2X (IDX16)(SRC), VS33 |
| STXVD2X VS32, (R0)(TGT) // store 16x2 bytes |
| STXVD2X VS33, (IDX16)(TGT) |
| BDNZ backward32loop |
| BC 12, 2, LR // beqlr, return if DWORDS == 0 |
| BR backward24 |