// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ppc64 ppc64le
#include "textflag.h"
// func memmove(to, from unsafe.Pointer, n uintptr)
TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
MOVD to+0(FP), R3
MOVD from+8(FP), R4
MOVD n+16(FP), R5
// Determine if there are doublewords to
// copy so a more efficient move can be done
ANDCC $7, R5, R7 // R7: bytes to copy
SRD $3, R5, R6 // R6: double words to copy
CMP R6, $0, CR1 // CR1[EQ] set if no double words to copy
// Determine overlap by subtracting dest - src and comparing against the
// length. The catches the cases where src and dest are in different types
// of storage such as stack and static to avoid doing backward move when not
// necessary.
SUB R4, R3, R8 // dest - src
CMPU R8, R5, CR2 // < len?
BC 12, 8, backward // BLT CR2 backward
// Copying forward if no overlap.
BC 12, 6, noforwardlarge // "BEQ CR1, noforwardlarge"
SRDCC $2,R6,R8 // 32 byte chunks?
BNE forward32setup //
MOVD R6,CTR // R6 = number of double words
// Move double words
MOVD 0(R4), R8 // double word
ADD $8,R4
MOVD R8, 0(R3) //
ADD $8,R3
BC 16, 0, forward8
BR noforwardlarge // handle remainder
// Prepare for moves of 32 bytes at a time.
DCBTST (R3) // prepare data cache
MOVD R8, CTR // double work count
MOVD $16, R8
LXVD2X (R4+R0), VS32 // load 16 bytes
LXVD2X (R4+R8), VS33
ADD $32, R4
STXVD2X VS32, (R3+R0) // store 16 bytes
STXVD2X VS33, (R3+R8)
ADD $32,R3 // bump up for next set
BC 16, 0, forward32 // continue
RLDCLCC $61,R5,$3,R6 // remaining doublewords
BEQ noforwardlarge
MOVD R6,CTR // set up the CTR
BR forward8
CMP R7,$0 // any remaining bytes
BC 4, 1, LR // ble lr
MOVD R7, CTR // move tail bytes
MOVBZ 0(R4), R8 // move single bytes
ADD $1,R4
MOVBZ R8, 0(R3)
ADD $1,R3
BC 16, 0, forwardtailloop
// Copying backwards proceeds by copying R7 bytes then copying R6 double words.
// R3 and R4 are advanced to the end of the destination/source buffers
// respectively and moved back as we copy.
ADD R5, R4, R4 // end of source
ADD R3, R5, R3 // end of dest
BEQ nobackwardtail // earlier condition
MOVD R7, CTR // bytes to move
MOVBZ -1(R4), R8 // point to last byte
SUB $1,R4
MOVBZ R8, -1(R3)
SUB $1,R3
BC 16, 0, backwardtailloop // bndz
BC 4, 5, LR // ble CR1 lr
SUB R3, R4, R9 // Use vsx if moving
CMP R9, $32 // at least 32 byte chunks
BLT backwardlargeloop // and distance >= 32
SRDCC $2,R6,R8 // 32 byte chunks
BNE backward32setup
MOVD -8(R4), R8
SUB $8,R4
MOVD R8, -8(R3)
SUB $8,R3
BC 16, 0, backwardlargeloop // bndz
MOVD R8, CTR // set up loop ctr
MOVD $16, R8 // 32 bytes at at time
SUB $32, R4
SUB $32, R3
LXVD2X (R4+R0), VS32 // load 16 bytes
LXVD2X (R4+R8), VS33
STXVD2X VS32, (R3+R0) // store 16 bytes
STXVD2X VS33, (R3+R8)
BC 16, 0, backward32loop // bndz
BC 4, 5, LR // ble CR1 lr
BR backwardlargeloop