| // Copyright 2014 The Go Authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style | 
 | // license that can be found in the LICENSE file. | 
 |  | 
 | //go:build ppc64 || ppc64le | 
 |  | 
 | #include "textflag.h" | 
 |  | 
 | // See memclrNoHeapPointers Go doc for important implementation constraints. | 
 |  | 
 | // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) | 
 | TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-16 | 
 | 	// R3 = ptr | 
 | 	// R4 = n | 
 |  | 
 | 	// Determine if there are doublewords to clear | 
 | check: | 
 | 	ANDCC $7, R4, R5  // R5: leftover bytes to clear | 
 | 	SRD   $3, R4, R6  // R6: double words to clear | 
 | 	CMP   R6, $0, CR1 // CR1[EQ] set if no double words | 
 |  | 
 | 	BC    12, 6, nozerolarge // only single bytes | 
 | 	CMP   R4, $512 | 
 | 	BLT   under512           // special case for < 512 | 
 | 	ANDCC $127, R3, R8       // check for 128 alignment of address | 
 | 	BEQ   zero512setup | 
 |  | 
 | 	ANDCC $7, R3, R15 | 
 | 	BEQ   zero512xsetup // at least 8 byte aligned | 
 |  | 
 | 	// zero bytes up to 8 byte alignment | 
 |  | 
 | 	ANDCC $1, R3, R15 // check for byte alignment | 
 | 	BEQ   byte2 | 
 | 	MOVB  R0, 0(R3)   // zero 1 byte | 
 | 	ADD   $1, R3      // bump ptr by 1 | 
 | 	ADD   $-1, R4 | 
 |  | 
 | byte2: | 
 | 	ANDCC $2, R3, R15 // check for 2 byte alignment | 
 | 	BEQ   byte4 | 
 | 	MOVH  R0, 0(R3)   // zero 2 bytes | 
 | 	ADD   $2, R3      // bump ptr by 2 | 
 | 	ADD   $-2, R4 | 
 |  | 
 | byte4: | 
 | 	ANDCC $4, R3, R15   // check for 4 byte alignment | 
 | 	BEQ   zero512xsetup | 
 | 	MOVW  R0, 0(R3)     // zero 4 bytes | 
 | 	ADD   $4, R3        // bump ptr by 4 | 
 | 	ADD   $-4, R4 | 
 | 	BR    zero512xsetup // ptr should now be 8 byte aligned | 
 |  | 
 | under512: | 
 | 	SRDCC $3, R6, R7  // 64 byte chunks? | 
 | 	XXLXOR VS32, VS32, VS32 // clear VS32 (V0) | 
 | 	BEQ   lt64gt8 | 
 |  | 
 | 	// Prepare to clear 64 bytes at a time. | 
 |  | 
 | zero64setup: | 
 | 	DCBTST (R3)             // prepare data cache | 
 | 	MOVD   R7, CTR          // number of 64 byte chunks | 
 | 	MOVD   $16, R8 | 
 | 	MOVD   $32, R16 | 
 | 	MOVD   $48, R17 | 
 |  | 
 | zero64: | 
 | 	STXVD2X VS32, (R3+R0)   // store 16 bytes | 
 | 	STXVD2X VS32, (R3+R8) | 
 | 	STXVD2X VS32, (R3+R16) | 
 | 	STXVD2X VS32, (R3+R17) | 
 | 	ADD     $64, R3 | 
 | 	ADD     $-64, R4 | 
 | 	BDNZ    zero64          // dec ctr, br zero64 if ctr not 0 | 
 | 	SRDCC   $3, R4, R6	// remaining doublewords | 
 | 	BEQ     nozerolarge | 
 |  | 
 | lt64gt8: | 
 | 	CMP	R4, $32 | 
 | 	BLT	lt32gt8 | 
 | 	MOVD	$16, R8 | 
 | 	STXVD2X	VS32, (R3+R0) | 
 | 	STXVD2X	VS32, (R3+R8) | 
 | 	ADD	$-32, R4 | 
 | 	ADD	$32, R3 | 
 | lt32gt8: | 
 | 	CMP	R4, $16 | 
 | 	BLT	lt16gt8 | 
 | 	STXVD2X	VS32, (R3+R0) | 
 | 	ADD	$16, R3 | 
 | 	ADD	$-16, R4 | 
 | lt16gt8: | 
 | #ifdef GOPPC64_power10 | 
 | 	SLD	$56, R4, R7 | 
 | 	STXVL   V0, R3, R7 | 
 | 	RET | 
 | #else | 
 | 	CMP	R4, $8 | 
 | 	BLT	nozerolarge | 
 | 	MOVD	R0, 0(R3) | 
 | 	ADD	$8, R3 | 
 | 	ADD	$-8, R4 | 
 | #endif | 
 | nozerolarge: | 
 | 	ANDCC $7, R4, R5 // any remaining bytes | 
 | 	BC    4, 1, LR   // ble lr | 
 | #ifdef GOPPC64_power10 | 
 | 	XXLXOR  VS32, VS32, VS32 // clear VS32 (V0) | 
 | 	SLD	$56, R5, R7 | 
 | 	STXVL   V0, R3, R7 | 
 | 	RET | 
 | #else | 
 | 	CMP   R5, $4 | 
 | 	BLT   next2 | 
 | 	MOVW  R0, 0(R3) | 
 | 	ADD   $4, R3 | 
 | 	ADD   $-4, R5 | 
 | next2: | 
 | 	CMP   R5, $2 | 
 | 	BLT   next1 | 
 | 	MOVH  R0, 0(R3) | 
 | 	ADD   $2, R3 | 
 | 	ADD   $-2, R5 | 
 | next1: | 
 | 	CMP   R5, $0 | 
 | 	BC    12, 2, LR      // beqlr | 
 | 	MOVB  R0, 0(R3) | 
 | 	RET | 
 | #endif | 
 |  | 
 | zero512xsetup:  // 512 chunk with extra needed | 
 | 	ANDCC $8, R3, R11    // 8 byte alignment? | 
 | 	BEQ   zero512setup16 | 
 | 	MOVD  R0, 0(R3)      // clear 8 bytes | 
 | 	ADD   $8, R3         // update ptr to next 8 | 
 | 	ADD   $-8, R4        // dec count by 8 | 
 |  | 
 | zero512setup16: | 
 | 	ANDCC $127, R3, R14 // < 128 byte alignment | 
 | 	BEQ   zero512setup  // handle 128 byte alignment | 
 | 	MOVD  $128, R15 | 
 | 	SUB   R14, R15, R14 // find increment to 128 alignment | 
 | 	SRD   $4, R14, R15  // number of 16 byte chunks | 
 | 	MOVD   R15, CTR         // loop counter of 16 bytes | 
 | 	XXLXOR VS32, VS32, VS32 // clear VS32 (V0) | 
 |  | 
 | zero512preloop:  // clear up to 128 alignment | 
 | 	STXVD2X VS32, (R3+R0)         // clear 16 bytes | 
 | 	ADD     $16, R3               // update ptr | 
 | 	ADD     $-16, R4              // dec count | 
 | 	BDNZ    zero512preloop | 
 |  | 
 | zero512setup:  // setup for dcbz loop | 
 | 	CMP  R4, $512   // check if at least 512 | 
 | 	BLT  remain | 
 | 	SRD  $9, R4, R8 // loop count for 512 chunks | 
 | 	MOVD R8, CTR    // set up counter | 
 | 	MOVD $128, R9   // index regs for 128 bytes | 
 | 	MOVD $256, R10 | 
 | 	MOVD $384, R11 | 
 | 	PCALIGN $16 | 
 | zero512: | 
 | 	DCBZ (R3+R0)        // clear first chunk | 
 | 	DCBZ (R3+R9)        // clear second chunk | 
 | 	DCBZ (R3+R10)       // clear third chunk | 
 | 	DCBZ (R3+R11)       // clear fourth chunk | 
 | 	ADD  $512, R3 | 
 | 	BDNZ zero512 | 
 | 	ANDCC $511, R4 | 
 |  | 
 | remain: | 
 | 	CMP  R4, $128  // check if 128 byte chunks left | 
 | 	BLT  smaller | 
 | 	DCBZ (R3+R0)   // clear 128 | 
 | 	ADD  $128, R3 | 
 | 	ADD  $-128, R4 | 
 | 	BR   remain | 
 |  | 
 | smaller: | 
 | 	ANDCC $127, R4, R7 // find leftovers | 
 | 	BEQ   done | 
 | 	CMP   R7, $64      // more than 64, do 64 at a time | 
 | 	XXLXOR VS32, VS32, VS32 | 
 | 	BLT   lt64gt8	   // less than 64 | 
 | 	SRD   $6, R7, R7   // set up counter for 64 | 
 | 	BR    zero64setup | 
 |  | 
 | done: | 
 | 	RET |