| // Copyright 2018 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // +build ppc64 ppc64le |
| |
| #include "go_asm.h" |
| #include "textflag.h" |
| |
| TEXT ·Equal(SB),NOSPLIT|NOFRAME,$0-49 |
| MOVD a_len+8(FP), R4 |
| MOVD b_len+32(FP), R5 |
| CMP R5, R4 // unequal lengths are not equal |
| BNE noteq |
| MOVD a_base+0(FP), R3 |
| MOVD b_base+24(FP), R4 |
| MOVD $ret+48(FP), R10 |
| BR memeqbody<>(SB) |
| |
| noteq: |
| MOVBZ $0,ret+48(FP) |
| RET |
| |
| equal: |
| MOVD $1,R3 |
| MOVBZ R3,ret+48(FP) |
| RET |
| |
| // memequal(a, b unsafe.Pointer, size uintptr) bool |
| TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 |
| MOVD a+0(FP), R3 |
| MOVD b+8(FP), R4 |
| MOVD size+16(FP), R5 |
| MOVD $ret+24(FP), R10 |
| |
| BR memeqbody<>(SB) |
| |
| // memequal_varlen(a, b unsafe.Pointer) bool |
| TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 |
| MOVD a+0(FP), R3 |
| MOVD b+8(FP), R4 |
| CMP R3, R4 |
| BEQ eq |
| MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure |
| MOVD $ret+16(FP), R10 |
| BR memeqbody<>(SB) |
| eq: |
| MOVD $1, R3 |
| MOVB R3, ret+16(FP) |
| RET |
| |
| // Do an efficient memequal for ppc64 |
| // R3 = s1 |
| // R4 = s2 |
| // R5 = len |
| // R10 = addr of return value (byte) |
| TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0 |
| MOVD R5,CTR |
| CMP R5,$8 // only optimize >=8 |
| BLT simplecheck |
| DCBT (R3) // cache hint |
| DCBT (R4) |
| CMP R5,$32 // optimize >= 32 |
| MOVD R5,R6 // needed if setup8a branch |
| BLT setup8a // 8 byte moves only |
| setup32a: // 8 byte aligned, >= 32 bytes |
| SRADCC $5,R5,R6 // number of 32 byte chunks to compare |
| MOVD R6,CTR |
| MOVD $16,R14 // index for VSX loads and stores |
| loop32a: |
| LXVD2X (R3+R0), VS32 // VS32 = V0 |
| LXVD2X (R4+R0), VS33 // VS33 = V1 |
| VCMPEQUBCC V0, V1, V2 // compare, setting CR6 |
| BGE CR6, noteq |
| LXVD2X (R3+R14), VS32 |
| LXVD2X (R4+R14), VS33 |
| VCMPEQUBCC V0, V1, V2 |
| BGE CR6, noteq |
| ADD $32,R3 // bump up to next 32 |
| ADD $32,R4 |
| BC 16, 0, loop32a // br ctr and cr |
| ANDCC $24,R5,R6 // Any 8 byte chunks? |
| BEQ leftover // and result is 0 |
| setup8a: |
| SRADCC $3,R6,R6 // get the 8 byte count |
| BEQ leftover // shifted value is 0 |
| MOVD R6,CTR |
| loop8: |
| MOVD 0(R3),R6 // doublewords to compare |
| ADD $8,R3 |
| MOVD 0(R4),R7 |
| ADD $8,R4 |
| CMP R6,R7 // match? |
| BC 8,2,loop8 // bt ctr <> 0 && cr |
| BNE noteq |
| leftover: |
| ANDCC $7,R5,R6 // check for leftover bytes |
| BEQ equal |
| MOVD R6,CTR |
| BR simple |
| simplecheck: |
| CMP R5,$0 |
| BEQ equal |
| simple: |
| MOVBZ 0(R3), R6 |
| ADD $1,R3 |
| MOVBZ 0(R4), R7 |
| ADD $1,R4 |
| CMP R6, R7 |
| BNE noteq |
| BC 8,2,simple |
| BNE noteq |
| BR equal |
| noteq: |
| MOVB $0, (R10) |
| RET |
| equal: |
| MOVD $1, R3 |
| MOVB R3, (R10) |
| RET |
| |