|  | // Copyright 2014 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | #include "textflag.h" | 
|  |  | 
|  | // See memmove Go doc for important implementation constraints. | 
|  |  | 
|  | // func memmove(to, from unsafe.Pointer, n uintptr) | 
|  | TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24 | 
|  | MOVD	to+0(FP), R3 | 
|  | MOVD	from+8(FP), R4 | 
|  | MOVD	n+16(FP), R5 | 
|  | CBNZ	R5, check | 
|  | RET | 
|  |  | 
|  | check: | 
|  | CMP	$16, R5 | 
|  | BLE	copy16 | 
|  |  | 
|  | AND	$~31, R5, R7	// R7 is N&~31 | 
|  | SUB	R7, R5, R6	// R6 is N&31 | 
|  |  | 
|  | CMP	R3, R4 | 
|  | BLT	backward | 
|  |  | 
|  | // Copying forward proceeds by copying R7/32 quadwords then R6 <= 31 tail bytes. | 
|  | // R3 and R4 are advanced as we copy. | 
|  |  | 
|  | // (There may be implementations of armv8 where copying by bytes until | 
|  | // at least one of source or dest is word aligned is a worthwhile | 
|  | // optimization, but the on the one tested so far (xgene) it did not | 
|  | // make a significance difference.) | 
|  |  | 
|  | CBZ	R7, noforwardlarge	// Do we need to do any quadword copying? | 
|  |  | 
|  | ADD	R3, R7, R9	// R9 points just past where we copy by word | 
|  |  | 
|  | forwardlargeloop: | 
|  | // Copy 32 bytes at a time. | 
|  | LDP.P	32(R4), (R8, R10) | 
|  | STP.P	(R8, R10), 32(R3) | 
|  | LDP	-16(R4), (R11, R12) | 
|  | STP	(R11, R12), -16(R3) | 
|  | SUB 	$32, R7, R7 | 
|  | CBNZ	R7, forwardlargeloop | 
|  |  | 
|  | noforwardlarge: | 
|  | CBNZ	R6, forwardtail		// Do we need to copy any tail bytes? | 
|  | RET | 
|  |  | 
|  | forwardtail: | 
|  | // There are R6 <= 31 bytes remaining to copy. | 
|  | // This is large enough to still contain pointers, | 
|  | // which must be copied atomically. | 
|  | // Copy the next 16 bytes, then 8 bytes, then any remaining bytes. | 
|  | TBZ	$4, R6, 3(PC)	// write 16 bytes if R6&16 != 0 | 
|  | LDP.P	16(R4), (R8, R10) | 
|  | STP.P	(R8, R10), 16(R3) | 
|  |  | 
|  | TBZ	$3, R6, 3(PC)	// write 8 bytes if R6&8 != 0 | 
|  | MOVD.P	8(R4), R8 | 
|  | MOVD.P	R8, 8(R3) | 
|  |  | 
|  | AND	$7, R6 | 
|  | CBNZ	R6, 2(PC) | 
|  | RET | 
|  |  | 
|  | ADD	R3, R6, R9	// R9 points just past the destination memory | 
|  |  | 
|  | forwardtailloop: | 
|  | MOVBU.P 1(R4), R8 | 
|  | MOVBU.P	R8, 1(R3) | 
|  | CMP	R3, R9 | 
|  | BNE	forwardtailloop | 
|  | RET | 
|  |  | 
|  | // Small copies: 1..16 bytes. | 
|  | copy16: | 
|  | ADD	R4, R5, R8	// R8 points just past the last source byte | 
|  | ADD	R3, R5, R9	// R9 points just past the last destination byte | 
|  | CMP	$8, R5 | 
|  | BLT	copy7 | 
|  | MOVD	(R4), R6 | 
|  | MOVD	-8(R8), R7 | 
|  | MOVD	R6, (R3) | 
|  | MOVD	R7, -8(R9) | 
|  | RET | 
|  |  | 
|  | copy7: | 
|  | TBZ	$2, R5, copy3 | 
|  | MOVWU	(R4), R6 | 
|  | MOVWU	-4(R8), R7 | 
|  | MOVW	R6, (R3) | 
|  | MOVW	R7, -4(R9) | 
|  | RET | 
|  |  | 
|  | copy3: | 
|  | TBZ	$1, R5, copy1 | 
|  | MOVHU	(R4), R6 | 
|  | MOVHU	-2(R8), R7 | 
|  | MOVH	R6, (R3) | 
|  | MOVH	R7, -2(R9) | 
|  | RET | 
|  |  | 
|  | copy1: | 
|  | MOVBU	(R4), R6 | 
|  | MOVB	R6, (R3) | 
|  | RET | 
|  |  | 
|  | backward: | 
|  | // Copying backwards first copies R6 <= 31 tail bytes, then R7/32 quadwords. | 
|  | // R3 and R4 are advanced to the end of the destination/source buffers | 
|  | // respectively and moved back as we copy. | 
|  |  | 
|  | ADD	R4, R5, R4	// R4 points just past the last source byte | 
|  | ADD	R3, R5, R3	// R3 points just past the last destination byte | 
|  |  | 
|  | CBZ	R6, nobackwardtail	// Do we need to do any byte-by-byte copying? | 
|  |  | 
|  | AND	$7, R6, R12 | 
|  | CBZ	R12, backwardtaillarge | 
|  |  | 
|  | SUB	R12, R3, R9	// R9 points at the lowest destination byte that should be copied by byte. | 
|  | backwardtailloop: | 
|  | // Copy sub-pointer-size tail. | 
|  | MOVBU.W	-1(R4), R8 | 
|  | MOVBU.W	R8, -1(R3) | 
|  | CMP	R9, R3 | 
|  | BNE	backwardtailloop | 
|  |  | 
|  | backwardtaillarge: | 
|  | // Do 8/16-byte write if possible. | 
|  | // See comment at forwardtail. | 
|  | TBZ	$3, R6, 3(PC) | 
|  | MOVD.W	-8(R4), R8 | 
|  | MOVD.W	R8, -8(R3) | 
|  |  | 
|  | TBZ	$4, R6, 3(PC) | 
|  | LDP.W	-16(R4), (R8, R10) | 
|  | STP.W	(R8, R10), -16(R3) | 
|  |  | 
|  | nobackwardtail: | 
|  | CBNZ     R7, backwardlarge	// Do we need to do any doubleword-by-doubleword copying? | 
|  | RET | 
|  |  | 
|  | backwardlarge: | 
|  | SUB	R7, R3, R9	// R9 points at the lowest destination byte | 
|  |  | 
|  | backwardlargeloop: | 
|  | LDP	-16(R4), (R8, R10) | 
|  | STP	(R8, R10), -16(R3) | 
|  | LDP.W	-32(R4), (R11, R12) | 
|  | STP.W	(R11, R12), -32(R3) | 
|  | CMP	R9, R3 | 
|  | BNE	backwardlargeloop | 
|  | RET |