blob: 18b9c850f240f3ca3021378ed826abd188e06001 [file] [log] [blame]
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build ppc64 || ppc64le
#include "textflag.h"
// See memmove Go doc for important implementation constraints.
// func memmove(to, from unsafe.Pointer, n uintptr)
// target address
#define TGT R3
// source address
#define SRC R4
// length to move
#define LEN R5
// number of doublewords
#define DWORDS R6
// number of bytes < 8
#define BYTES R7
// const 16 used as index
#define IDX16 R8
// temp used for copies, etc.
#define TMP R9
// number of 64 byte chunks
#define QWORDS R10
// index values
#define IDX32 R14
#define IDX48 R15
#define OCTWORDS R16
TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24
// R3 = TGT = to
// R4 = SRC = from
// R5 = LEN = n
// Determine if there are doublewords to
// copy so a more efficient move can be done
check:
#ifdef GOPPC64_power10
CMP LEN, $16
BGT mcopy
SLD $56, LEN, TMP
LXVL SRC, TMP, V0
STXVL V0, TGT, TMP
RET
#endif
mcopy:
ANDCC $7, LEN, BYTES // R7: bytes to copy
SRD $3, LEN, DWORDS // R6: double words to copy
MOVFL CR0, CR3 // save CR from ANDCC
CMP DWORDS, $0, CR1 // CR1[EQ] set if no double words to copy
// Determine overlap by subtracting dest - src and comparing against the
// length. This catches the cases where src and dest are in different types
// of storage such as stack and static to avoid doing backward move when not
// necessary.
SUB SRC, TGT, TMP // dest - src
CMPU TMP, LEN, CR2 // < len?
BC 12, 8, backward // BLT CR2 backward
// Copying forward if no overlap.
BC 12, 6, checkbytes // BEQ CR1, checkbytes
SRDCC $3, DWORDS, OCTWORDS // 64 byte chunks?
MOVD $16, IDX16
BEQ lt64gt8 // < 64 bytes
// Prepare for moves of 64 bytes at a time.
forward64setup:
DCBTST (TGT) // prepare data cache
DCBT (SRC)
MOVD OCTWORDS, CTR // Number of 64 byte chunks
MOVD $32, IDX32
MOVD $48, IDX48
PCALIGN $16
forward64:
LXVD2X (R0)(SRC), VS32 // load 64 bytes
LXVD2X (IDX16)(SRC), VS33
LXVD2X (IDX32)(SRC), VS34
LXVD2X (IDX48)(SRC), VS35
ADD $64, SRC
STXVD2X VS32, (R0)(TGT) // store 64 bytes
STXVD2X VS33, (IDX16)(TGT)
STXVD2X VS34, (IDX32)(TGT)
STXVD2X VS35, (IDX48)(TGT)
ADD $64,TGT // bump up for next set
BC 16, 0, forward64 // continue
ANDCC $7, DWORDS // remaining doublewords
BEQ checkbytes // only bytes remain
lt64gt8:
CMP DWORDS, $4
BLT lt32gt8
LXVD2X (R0)(SRC), VS32
LXVD2X (IDX16)(SRC), VS33
ADD $-4, DWORDS
STXVD2X VS32, (R0)(TGT)
STXVD2X VS33, (IDX16)(TGT)
ADD $32, SRC
ADD $32, TGT
lt32gt8:
// At this point >= 8 and < 32
// Move 16 bytes if possible
CMP DWORDS, $2
BLT lt16
LXVD2X (R0)(SRC), VS32
ADD $-2, DWORDS
STXVD2X VS32, (R0)(TGT)
ADD $16, SRC
ADD $16, TGT
lt16: // Move 8 bytes if possible
CMP DWORDS, $1
BLT checkbytes
#ifdef GOPPC64_power10
ADD $8, BYTES
SLD $56, BYTES, TMP
LXVL SRC, TMP, V0
STXVL V0, TGT, TMP
RET
#endif
MOVD 0(SRC), TMP
ADD $8, SRC
MOVD TMP, 0(TGT)
ADD $8, TGT
checkbytes:
BC 12, 14, LR // BEQ lr
#ifdef GOPPC64_power10
SLD $56, BYTES, TMP
LXVL SRC, TMP, V0
STXVL V0, TGT, TMP
RET
#endif
lt8: // Move word if possible
CMP BYTES, $4
BLT lt4
MOVWZ 0(SRC), TMP
ADD $-4, BYTES
MOVW TMP, 0(TGT)
ADD $4, SRC
ADD $4, TGT
lt4: // Move halfword if possible
CMP BYTES, $2
BLT lt2
MOVHZ 0(SRC), TMP
ADD $-2, BYTES
MOVH TMP, 0(TGT)
ADD $2, SRC
ADD $2, TGT
lt2: // Move last byte if 1 left
CMP BYTES, $1
BC 12, 0, LR // ble lr
MOVBZ 0(SRC), TMP
MOVBZ TMP, 0(TGT)
RET
backward:
// Copying backwards proceeds by copying R7 bytes then copying R6 double words.
// R3 and R4 are advanced to the end of the destination/source buffers
// respectively and moved back as we copy.
ADD LEN, SRC, SRC // end of source
ADD TGT, LEN, TGT // end of dest
BEQ nobackwardtail // earlier condition
MOVD BYTES, CTR // bytes to move
backwardtailloop:
MOVBZ -1(SRC), TMP // point to last byte
SUB $1,SRC
MOVBZ TMP, -1(TGT)
SUB $1,TGT
BDNZ backwardtailloop
nobackwardtail:
BC 4, 5, LR // blelr cr1, return if DWORDS == 0
SRDCC $2,DWORDS,QWORDS // Compute number of 32B blocks and compare to 0
BNE backward32setup // If QWORDS != 0, start the 32B copy loop.
backward24:
// DWORDS is a value between 1-3.
CMP DWORDS, $2
MOVD -8(SRC), TMP
MOVD TMP, -8(TGT)
BC 12, 0, LR // bltlr, return if DWORDS == 1
MOVD -16(SRC), TMP
MOVD TMP, -16(TGT)
BC 12, 2, LR // beqlr, return if DWORDS == 2
MOVD -24(SRC), TMP
MOVD TMP, -24(TGT)
RET
backward32setup:
ANDCC $3,DWORDS // Compute remaining DWORDS and compare to 0
MOVD QWORDS, CTR // set up loop ctr
MOVD $16, IDX16 // 32 bytes at a time
PCALIGN $16
backward32loop:
SUB $32, TGT
SUB $32, SRC
LXVD2X (R0)(SRC), VS32 // load 16x2 bytes
LXVD2X (IDX16)(SRC), VS33
STXVD2X VS32, (R0)(TGT) // store 16x2 bytes
STXVD2X VS33, (IDX16)(TGT)
BDNZ backward32loop
BC 12, 2, LR // beqlr, return if DWORDS == 0
BR backward24