|  | // Inferno's libkern/memmove-arm.s | 
|  | // https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-arm.s | 
|  | // | 
|  | //         Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. | 
|  | //         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved. | 
|  | //         Portions Copyright 2009 The Go Authors. All rights reserved. | 
|  | // | 
|  | // Permission is hereby granted, free of charge, to any person obtaining a copy | 
|  | // of this software and associated documentation files (the "Software"), to deal | 
|  | // in the Software without restriction, including without limitation the rights | 
|  | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 
|  | // copies of the Software, and to permit persons to whom the Software is | 
|  | // furnished to do so, subject to the following conditions: | 
|  | // | 
|  | // The above copyright notice and this permission notice shall be included in | 
|  | // all copies or substantial portions of the Software. | 
|  | // | 
|  | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|  | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|  | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE | 
|  | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
|  | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
|  | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | 
|  | // THE SOFTWARE. | 
|  |  | 
|  | #include "textflag.h" | 
|  |  | 
|  | // TE or TS are spilled to the stack during bulk register moves. | 
|  | #define TS	R0 | 
|  | #define TE	R8 | 
|  |  | 
|  | // Warning: the linker will use R11 to synthesize certain instructions. Please | 
|  | // take care and double check with objdump. | 
|  | #define FROM	R11 | 
|  | #define N	R12 | 
|  | #define TMP	R12				/* N and TMP don't overlap */ | 
|  | #define TMP1	R5 | 
|  |  | 
|  | #define RSHIFT	R5 | 
|  | #define LSHIFT	R6 | 
|  | #define OFFSET	R7 | 
|  |  | 
|  | #define BR0	R0					/* shared with TS */ | 
|  | #define BW0	R1 | 
|  | #define BR1	R1 | 
|  | #define BW1	R2 | 
|  | #define BR2	R2 | 
|  | #define BW2	R3 | 
|  | #define BR3	R3 | 
|  | #define BW3	R4 | 
|  |  | 
|  | #define FW0	R1 | 
|  | #define FR0	R2 | 
|  | #define FW1	R2 | 
|  | #define FR1	R3 | 
|  | #define FW2	R3 | 
|  | #define FR2	R4 | 
|  | #define FW3	R4 | 
|  | #define FR3	R8					/* shared with TE */ | 
|  |  | 
|  | // func memmove(to, from unsafe.Pointer, n uintptr) | 
|  | TEXT runtime·memmove(SB), NOSPLIT, $4-12 | 
|  | _memmove: | 
|  | MOVW	to+0(FP), TS | 
|  | MOVW	from+4(FP), FROM | 
|  | MOVW	n+8(FP), N | 
|  |  | 
|  | ADD	N, TS, TE	/* to end pointer */ | 
|  |  | 
|  | CMP	FROM, TS | 
|  | BLS	_forward | 
|  |  | 
|  | _back: | 
|  | ADD	N, FROM		/* from end pointer */ | 
|  | CMP	$4, N		/* need at least 4 bytes to copy */ | 
|  | BLT	_b1tail | 
|  |  | 
|  | _b4align:				/* align destination on 4 */ | 
|  | AND.S	$3, TE, TMP | 
|  | BEQ	_b4aligned | 
|  |  | 
|  | MOVBU.W	-1(FROM), TMP	/* pre-indexed */ | 
|  | MOVBU.W	TMP, -1(TE)	/* pre-indexed */ | 
|  | B	_b4align | 
|  |  | 
|  | _b4aligned:				/* is source now aligned? */ | 
|  | AND.S	$3, FROM, TMP | 
|  | BNE	_bunaligned | 
|  |  | 
|  | ADD	$31, TS, TMP	/* do 32-byte chunks if possible */ | 
|  | MOVW	TS, savedts-4(SP) | 
|  | _b32loop: | 
|  | CMP	TMP, TE | 
|  | BLS	_b4tail | 
|  |  | 
|  | MOVM.DB.W (FROM), [R0-R7] | 
|  | MOVM.DB.W [R0-R7], (TE) | 
|  | B	_b32loop | 
|  |  | 
|  | _b4tail:				/* do remaining words if possible */ | 
|  | MOVW	savedts-4(SP), TS | 
|  | ADD	$3, TS, TMP | 
|  | _b4loop: | 
|  | CMP	TMP, TE | 
|  | BLS	_b1tail | 
|  |  | 
|  | MOVW.W	-4(FROM), TMP1	/* pre-indexed */ | 
|  | MOVW.W	TMP1, -4(TE)	/* pre-indexed */ | 
|  | B	_b4loop | 
|  |  | 
|  | _b1tail:				/* remaining bytes */ | 
|  | CMP	TE, TS | 
|  | BEQ	_return | 
|  |  | 
|  | MOVBU.W	-1(FROM), TMP	/* pre-indexed */ | 
|  | MOVBU.W	TMP, -1(TE)	/* pre-indexed */ | 
|  | B	_b1tail | 
|  |  | 
|  | _forward: | 
|  | CMP	$4, N		/* need at least 4 bytes to copy */ | 
|  | BLT	_f1tail | 
|  |  | 
|  | _f4align:				/* align destination on 4 */ | 
|  | AND.S	$3, TS, TMP | 
|  | BEQ	_f4aligned | 
|  |  | 
|  | MOVBU.P	1(FROM), TMP	/* implicit write back */ | 
|  | MOVBU.P	TMP, 1(TS)	/* implicit write back */ | 
|  | B	_f4align | 
|  |  | 
|  | _f4aligned:				/* is source now aligned? */ | 
|  | AND.S	$3, FROM, TMP | 
|  | BNE	_funaligned | 
|  |  | 
|  | SUB	$31, TE, TMP	/* do 32-byte chunks if possible */ | 
|  | MOVW	TE, savedte-4(SP) | 
|  | _f32loop: | 
|  | CMP	TMP, TS | 
|  | BHS	_f4tail | 
|  |  | 
|  | MOVM.IA.W (FROM), [R1-R8] | 
|  | MOVM.IA.W [R1-R8], (TS) | 
|  | B	_f32loop | 
|  |  | 
|  | _f4tail: | 
|  | MOVW	savedte-4(SP), TE | 
|  | SUB	$3, TE, TMP	/* do remaining words if possible */ | 
|  | _f4loop: | 
|  | CMP	TMP, TS | 
|  | BHS	_f1tail | 
|  |  | 
|  | MOVW.P	4(FROM), TMP1	/* implicit write back */ | 
|  | MOVW.P	TMP1, 4(TS)	/* implicit write back */ | 
|  | B	_f4loop | 
|  |  | 
|  | _f1tail: | 
|  | CMP	TS, TE | 
|  | BEQ	_return | 
|  |  | 
|  | MOVBU.P	1(FROM), TMP	/* implicit write back */ | 
|  | MOVBU.P	TMP, 1(TS)	/* implicit write back */ | 
|  | B	_f1tail | 
|  |  | 
|  | _return: | 
|  | MOVW	to+0(FP), R0 | 
|  | RET | 
|  |  | 
|  | _bunaligned: | 
|  | CMP	$2, TMP		/* is TMP < 2 ? */ | 
|  |  | 
|  | MOVW.LT	$8, RSHIFT		/* (R(n)<<24)|(R(n-1)>>8) */ | 
|  | MOVW.LT	$24, LSHIFT | 
|  | MOVW.LT	$1, OFFSET | 
|  |  | 
|  | MOVW.EQ	$16, RSHIFT		/* (R(n)<<16)|(R(n-1)>>16) */ | 
|  | MOVW.EQ	$16, LSHIFT | 
|  | MOVW.EQ	$2, OFFSET | 
|  |  | 
|  | MOVW.GT	$24, RSHIFT		/* (R(n)<<8)|(R(n-1)>>24) */ | 
|  | MOVW.GT	$8, LSHIFT | 
|  | MOVW.GT	$3, OFFSET | 
|  |  | 
|  | ADD	$16, TS, TMP	/* do 16-byte chunks if possible */ | 
|  | CMP	TMP, TE | 
|  | BLS	_b1tail | 
|  |  | 
|  | BIC	$3, FROM		/* align source */ | 
|  | MOVW	TS, savedts-4(SP) | 
|  | MOVW	(FROM), BR0	/* prime first block register */ | 
|  |  | 
|  | _bu16loop: | 
|  | CMP	TMP, TE | 
|  | BLS	_bu1tail | 
|  |  | 
|  | MOVW	BR0<<LSHIFT, BW3 | 
|  | MOVM.DB.W (FROM), [BR0-BR3] | 
|  | ORR	BR3>>RSHIFT, BW3 | 
|  |  | 
|  | MOVW	BR3<<LSHIFT, BW2 | 
|  | ORR	BR2>>RSHIFT, BW2 | 
|  |  | 
|  | MOVW	BR2<<LSHIFT, BW1 | 
|  | ORR	BR1>>RSHIFT, BW1 | 
|  |  | 
|  | MOVW	BR1<<LSHIFT, BW0 | 
|  | ORR	BR0>>RSHIFT, BW0 | 
|  |  | 
|  | MOVM.DB.W [BW0-BW3], (TE) | 
|  | B	_bu16loop | 
|  |  | 
|  | _bu1tail: | 
|  | MOVW	savedts-4(SP), TS | 
|  | ADD	OFFSET, FROM | 
|  | B	_b1tail | 
|  |  | 
|  | _funaligned: | 
|  | CMP	$2, TMP | 
|  |  | 
|  | MOVW.LT	$8, RSHIFT		/* (R(n+1)<<24)|(R(n)>>8) */ | 
|  | MOVW.LT	$24, LSHIFT | 
|  | MOVW.LT	$3, OFFSET | 
|  |  | 
|  | MOVW.EQ	$16, RSHIFT		/* (R(n+1)<<16)|(R(n)>>16) */ | 
|  | MOVW.EQ	$16, LSHIFT | 
|  | MOVW.EQ	$2, OFFSET | 
|  |  | 
|  | MOVW.GT	$24, RSHIFT		/* (R(n+1)<<8)|(R(n)>>24) */ | 
|  | MOVW.GT	$8, LSHIFT | 
|  | MOVW.GT	$1, OFFSET | 
|  |  | 
|  | SUB	$16, TE, TMP	/* do 16-byte chunks if possible */ | 
|  | CMP	TMP, TS | 
|  | BHS	_f1tail | 
|  |  | 
|  | BIC	$3, FROM		/* align source */ | 
|  | MOVW	TE, savedte-4(SP) | 
|  | MOVW.P	4(FROM), FR3	/* prime last block register, implicit write back */ | 
|  |  | 
|  | _fu16loop: | 
|  | CMP	TMP, TS | 
|  | BHS	_fu1tail | 
|  |  | 
|  | MOVW	FR3>>RSHIFT, FW0 | 
|  | MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3] | 
|  | ORR	FR0<<LSHIFT, FW0 | 
|  |  | 
|  | MOVW	FR0>>RSHIFT, FW1 | 
|  | ORR	FR1<<LSHIFT, FW1 | 
|  |  | 
|  | MOVW	FR1>>RSHIFT, FW2 | 
|  | ORR	FR2<<LSHIFT, FW2 | 
|  |  | 
|  | MOVW	FR2>>RSHIFT, FW3 | 
|  | ORR	FR3<<LSHIFT, FW3 | 
|  |  | 
|  | MOVM.IA.W [FW0,FW1,FW2,FW3], (TS) | 
|  | B	_fu16loop | 
|  |  | 
|  | _fu1tail: | 
|  | MOVW	savedte-4(SP), TE | 
|  | SUB	OFFSET, FROM | 
|  | B	_f1tail |