| // Copyright 2014 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build ppc64 || ppc64le |
| |
| #include "textflag.h" |
| |
| // See memclrNoHeapPointers Go doc for important implementation constraints. |
| |
| // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) |
| TEXT runtime·memclrNoHeapPointers<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-16 |
| // R3 = ptr |
| // R4 = n |
| |
| // Determine if there are doublewords to clear |
| check: |
| ANDCC $7, R4, R5 // R5: leftover bytes to clear |
| SRD $3, R4, R6 // R6: double words to clear |
| CMP R6, $0, CR1 // CR1[EQ] set if no double words |
| |
| BC 12, 6, nozerolarge // only single bytes |
| CMP R4, $512 |
| BLT under512 // special case for < 512 |
| ANDCC $127, R3, R8 // check for 128 alignment of address |
| BEQ zero512setup |
| |
| ANDCC $7, R3, R15 |
| BEQ zero512xsetup // at least 8 byte aligned |
| |
| // zero bytes up to 8 byte alignment |
| |
| ANDCC $1, R3, R15 // check for byte alignment |
| BEQ byte2 |
| MOVB R0, 0(R3) // zero 1 byte |
| ADD $1, R3 // bump ptr by 1 |
| ADD $-1, R4 |
| |
| byte2: |
| ANDCC $2, R3, R15 // check for 2 byte alignment |
| BEQ byte4 |
| MOVH R0, 0(R3) // zero 2 bytes |
| ADD $2, R3 // bump ptr by 2 |
| ADD $-2, R4 |
| |
| byte4: |
| ANDCC $4, R3, R15 // check for 4 byte alignment |
| BEQ zero512xsetup |
| MOVW R0, 0(R3) // zero 4 bytes |
| ADD $4, R3 // bump ptr by 4 |
| ADD $-4, R4 |
| BR zero512xsetup // ptr should now be 8 byte aligned |
| |
| under512: |
| SRDCC $3, R6, R7 // 64 byte chunks? |
| XXLXOR VS32, VS32, VS32 // clear VS32 (V0) |
| BEQ lt64gt8 |
| |
| // Prepare to clear 64 bytes at a time. |
| |
| zero64setup: |
| DCBTST (R3) // prepare data cache |
| MOVD R7, CTR // number of 64 byte chunks |
| MOVD $16, R8 |
| MOVD $32, R16 |
| MOVD $48, R17 |
| |
| zero64: |
| STXVD2X VS32, (R3+R0) // store 16 bytes |
| STXVD2X VS32, (R3+R8) |
| STXVD2X VS32, (R3+R16) |
| STXVD2X VS32, (R3+R17) |
| ADD $64, R3 |
| ADD $-64, R4 |
| BDNZ zero64 // dec ctr, br zero64 if ctr not 0 |
| SRDCC $3, R4, R6 // remaining doublewords |
| BEQ nozerolarge |
| |
| lt64gt8: |
| CMP R4, $32 |
| BLT lt32gt8 |
| MOVD $16, R8 |
| STXVD2X VS32, (R3+R0) |
| STXVD2X VS32, (R3+R8) |
| ADD $-32, R4 |
| ADD $32, R3 |
| lt32gt8: |
| CMP R4, $16 |
| BLT lt16gt8 |
| STXVD2X VS32, (R3+R0) |
| ADD $16, R3 |
| ADD $-16, R4 |
| lt16gt8: |
| #ifdef GOPPC64_power10 |
| SLD $56, R4, R7 |
| STXVL V0, R3, R7 |
| RET |
| #else |
| CMP R4, $8 |
| BLT nozerolarge |
| MOVD R0, 0(R3) |
| ADD $8, R3 |
| ADD $-8, R4 |
| #endif |
| nozerolarge: |
| ANDCC $7, R4, R5 // any remaining bytes |
| BC 4, 1, LR // ble lr |
| #ifdef GOPPC64_power10 |
| XXLXOR VS32, VS32, VS32 // clear VS32 (V0) |
| SLD $56, R5, R7 |
| STXVL V0, R3, R7 |
| RET |
| #else |
| CMP R5, $4 |
| BLT next2 |
| MOVW R0, 0(R3) |
| ADD $4, R3 |
| ADD $-4, R5 |
| next2: |
| CMP R5, $2 |
| BLT next1 |
| MOVH R0, 0(R3) |
| ADD $2, R3 |
| ADD $-2, R5 |
| next1: |
| CMP R5, $0 |
| BC 12, 2, LR // beqlr |
| MOVB R0, 0(R3) |
| RET |
| #endif |
| |
| zero512xsetup: // 512 chunk with extra needed |
| ANDCC $8, R3, R11 // 8 byte alignment? |
| BEQ zero512setup16 |
| MOVD R0, 0(R3) // clear 8 bytes |
| ADD $8, R3 // update ptr to next 8 |
| ADD $-8, R4 // dec count by 8 |
| |
| zero512setup16: |
| ANDCC $127, R3, R14 // < 128 byte alignment |
| BEQ zero512setup // handle 128 byte alignment |
| MOVD $128, R15 |
| SUB R14, R15, R14 // find increment to 128 alignment |
| SRD $4, R14, R15 // number of 16 byte chunks |
| MOVD R15, CTR // loop counter of 16 bytes |
| XXLXOR VS32, VS32, VS32 // clear VS32 (V0) |
| |
| zero512preloop: // clear up to 128 alignment |
| STXVD2X VS32, (R3+R0) // clear 16 bytes |
| ADD $16, R3 // update ptr |
| ADD $-16, R4 // dec count |
| BDNZ zero512preloop |
| |
| zero512setup: // setup for dcbz loop |
| CMP R4, $512 // check if at least 512 |
| BLT remain |
| SRD $9, R4, R8 // loop count for 512 chunks |
| MOVD R8, CTR // set up counter |
| MOVD $128, R9 // index regs for 128 bytes |
| MOVD $256, R10 |
| MOVD $384, R11 |
| PCALIGN $16 |
| zero512: |
| DCBZ (R3+R0) // clear first chunk |
| DCBZ (R3+R9) // clear second chunk |
| DCBZ (R3+R10) // clear third chunk |
| DCBZ (R3+R11) // clear fourth chunk |
| ADD $512, R3 |
| BDNZ zero512 |
| ANDCC $511, R4 |
| |
| remain: |
| CMP R4, $128 // check if 128 byte chunks left |
| BLT smaller |
| DCBZ (R3+R0) // clear 128 |
| ADD $128, R3 |
| ADD $-128, R4 |
| BR remain |
| |
| smaller: |
| ANDCC $127, R4, R7 // find leftovers |
| BEQ done |
| CMP R7, $64 // more than 64, do 64 at a time |
| XXLXOR VS32, VS32, VS32 |
| BLT lt64gt8 // less than 64 |
| SRD $6, R7, R7 // set up counter for 64 |
| BR zero64setup |
| |
| done: |
| RET |