| // Copyright 2016 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| #include "textflag.h" |
| |
| // See memclrNoHeapPointers Go doc for important implementation constraints. |
| |
| // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) |
| TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT|NOFRAME,$0-16 |
| MOVD ptr+0(FP), R4 |
| MOVD n+8(FP), R5 |
| |
| CMPBGE R5, $32, clearge32 |
| |
| start: |
| CMPBLE R5, $3, clear0to3 |
| CMPBLE R5, $7, clear4to7 |
| CMPBLE R5, $11, clear8to11 |
| CMPBLE R5, $15, clear12to15 |
| MOVD $0, 0(R4) |
| MOVD $0, 8(R4) |
| ADD $16, R4 |
| SUB $16, R5 |
| BR start |
| |
| clear0to3: |
| CMPBEQ R5, $0, done |
| CMPBNE R5, $1, clear2 |
| MOVB $0, 0(R4) |
| RET |
| clear2: |
| CMPBNE R5, $2, clear3 |
| MOVH $0, 0(R4) |
| RET |
| clear3: |
| MOVH $0, 0(R4) |
| MOVB $0, 2(R4) |
| RET |
| |
| clear4to7: |
| CMPBNE R5, $4, clear5 |
| MOVW $0, 0(R4) |
| RET |
| clear5: |
| CMPBNE R5, $5, clear6 |
| MOVW $0, 0(R4) |
| MOVB $0, 4(R4) |
| RET |
| clear6: |
| CMPBNE R5, $6, clear7 |
| MOVW $0, 0(R4) |
| MOVH $0, 4(R4) |
| RET |
| clear7: |
| MOVW $0, 0(R4) |
| MOVH $0, 4(R4) |
| MOVB $0, 6(R4) |
| RET |
| |
| clear8to11: |
| CMPBNE R5, $8, clear9 |
| MOVD $0, 0(R4) |
| RET |
| clear9: |
| CMPBNE R5, $9, clear10 |
| MOVD $0, 0(R4) |
| MOVB $0, 8(R4) |
| RET |
| clear10: |
| CMPBNE R5, $10, clear11 |
| MOVD $0, 0(R4) |
| MOVH $0, 8(R4) |
| RET |
| clear11: |
| MOVD $0, 0(R4) |
| MOVH $0, 8(R4) |
| MOVB $0, 10(R4) |
| RET |
| |
| clear12to15: |
| CMPBNE R5, $12, clear13 |
| MOVD $0, 0(R4) |
| MOVW $0, 8(R4) |
| RET |
| clear13: |
| CMPBNE R5, $13, clear14 |
| MOVD $0, 0(R4) |
| MOVW $0, 8(R4) |
| MOVB $0, 12(R4) |
| RET |
| clear14: |
| CMPBNE R5, $14, clear15 |
| MOVD $0, 0(R4) |
| MOVW $0, 8(R4) |
| MOVH $0, 12(R4) |
| RET |
| clear15: |
| MOVD $0, 0(R4) |
| MOVW $0, 8(R4) |
| MOVH $0, 12(R4) |
| MOVB $0, 14(R4) |
| RET |
| |
| clearge32: |
| CMP R5, $4096 |
| BLT clear256Bto4KB |
| |
| // For size >= 4KB, XC is loop unrolled 16 times (4KB = 256B * 16) |
| clearge4KB: |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| CMP R5, $4096 |
| BGE clearge4KB |
| |
| clear256Bto4KB: |
| CMP R5, $256 |
| BLT clear32to255 |
| XC $256, 0(R4), 0(R4) |
| ADD $256, R4 |
| ADD $-256, R5 |
| BR clear256Bto4KB |
| |
| clear32to255: |
| CMPBEQ R5, $0, done |
| CMPBLT R5, $32, start |
| CMPBEQ R5, $32, clear32 |
| CMPBLE R5, $64, clear33to64 |
| CMP R5, $128 |
| BLE clear65to128 |
| CMP R5, $255 |
| BLE clear129to255 |
| |
| clear32: |
| VZERO V1 |
| VST V1, 0(R4) |
| VST V1, 16(R4) |
| RET |
| |
| clear33to64: |
| VZERO V1 |
| VST V1, 0(R4) |
| VST V1, 16(R4) |
| ADD $-32, R5 |
| VST V1, 0(R4)(R5) |
| VST V1, 16(R4)(R5) |
| RET |
| |
| clear65to128: |
| VZERO V1 |
| VST V1, 0(R4) |
| VST V1, 16(R4) |
| VST V1, 32(R4) |
| VST V1, 48(R4) |
| ADD $-64, R5 |
| VST V1, 0(R4)(R5) |
| VST V1, 16(R4)(R5) |
| VST V1, 32(R4)(R5) |
| VST V1, 48(R4)(R5) |
| RET |
| |
| clear129to255: |
| VZERO V1 |
| VST V1, 0(R4) |
| VST V1, 16(R4) |
| VST V1, 32(R4) |
| VST V1, 48(R4) |
| VST V1, 64(R4) |
| VST V1, 80(R4) |
| VST V1, 96(R4) |
| VST V1, 112(R4) |
| ADD $-128, R5 |
| VST V1, 0(R4)(R5) |
| VST V1, 16(R4)(R5) |
| VST V1, 32(R4)(R5) |
| VST V1, 48(R4)(R5) |
| VST V1, 64(R4)(R5) |
| VST V1, 80(R4)(R5) |
| VST V1, 96(R4)(R5) |
| VST V1, 112(R4)(R5) |
| RET |
| |
| done: |
| RET |
| |