| // Copyright 2018 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build ppc64 || ppc64le |
| |
| #include "go_asm.h" |
| #include "textflag.h" |
| |
| // Helper names for x-form loads in BE ordering. |
| #ifdef GOARCH_ppc64le |
| #define _LDBEX MOVDBR |
| #define _LWBEX MOVWBR |
| #define _LHBEX MOVHBR |
| #else |
| #define _LDBEX MOVD |
| #define _LWBEX MOVW |
| #define _LHBEX MOVH |
| #endif |
| |
| #ifdef GOPPC64_power9 |
| #define SETB_CR0(rout) SETB CR0, rout |
| #define SETB_CR1(rout) SETB CR1, rout |
| #define SETB_INIT() |
| #define SETB_CR0_NE(rout) SETB_CR0(rout) |
| #else |
| // A helper macro to emulate SETB on P8. This assumes |
| // -1 is in R20, and 1 is in R21. crxlt and crxeq must |
| // also be the same CR field. |
| #define _SETB(crxlt, crxeq, rout) \ |
| ISEL crxeq,R0,R21,rout \ |
| ISEL crxlt,R20,rout,rout |
| |
| // A special case when it is know the comparison |
| // will always be not equal. The result must be -1 or 1. |
| #define SETB_CR0_NE(rout) \ |
| ISEL CR0LT,R20,R21,rout |
| |
| #define SETB_CR0(rout) _SETB(CR0LT, CR0EQ, rout) |
| #define SETB_CR1(rout) _SETB(CR1LT, CR1EQ, rout) |
| #define SETB_INIT() \ |
| MOVD $-1,R20 \ |
| MOVD $1,R21 |
| #endif |
| |
| TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56 |
| // incoming: |
| // R3 a addr |
| // R4 a len |
| // R6 b addr |
| // R7 b len |
| // |
| // on entry to cmpbody: |
| // R3 return value if len(a) == len(b) |
| // R5 a addr |
| // R6 b addr |
| // R9 min(len(a),len(b)) |
| SETB_INIT() |
| MOVD R3,R5 |
| CMP R4,R7,CR0 |
| CMP R3,R6,CR7 |
| ISEL CR0LT,R4,R7,R9 |
| SETB_CR0(R3) |
| BC $12,30,LR // beqlr cr7 |
| BR cmpbody<>(SB) |
| |
| TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40 |
| // incoming: |
| // R3 a addr -> R5 |
| // R4 a len -> R3 |
| // R5 b addr -> R6 |
| // R6 b len -> R4 |
| // |
| // on entry to cmpbody: |
| // R3 compare value if compared length is same. |
| // R5 a addr |
| // R6 b addr |
| // R9 min(len(a),len(b)) |
| SETB_INIT() |
| CMP R4,R6,CR0 |
| CMP R3,R5,CR7 |
| ISEL CR0LT,R4,R6,R9 |
| MOVD R5,R6 |
| MOVD R3,R5 |
| SETB_CR0(R3) |
| BC $12,30,LR // beqlr cr7 |
| BR cmpbody<>(SB) |
| |
| #ifdef GOARCH_ppc64le |
| DATA byteswap<>+0(SB)/8, $0x0706050403020100 |
| DATA byteswap<>+8(SB)/8, $0x0f0e0d0c0b0a0908 |
| GLOBL byteswap<>+0(SB), RODATA, $16 |
| #define SWAP V21 |
| #endif |
| |
| TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0 |
| start: |
| CMP R9,$16,CR0 |
| CMP R9,$32,CR1 |
| CMP R9,$64,CR2 |
| MOVD $16,R10 |
| BLT cmp8 |
| BLT CR1,cmp16 |
| BLT CR2,cmp32 |
| |
| cmp64: // >= 64B |
| DCBT (R5) // optimize for size>=64 |
| DCBT (R6) // cache hint |
| |
| SRD $6,R9,R14 // There is at least one iteration. |
| MOVD R14,CTR |
| ANDCC $63,R9,R9 |
| CMP R9,$16,CR1 // Do setup for tail check early on. |
| CMP R9,$32,CR2 |
| CMP R9,$48,CR3 |
| ADD $-16,R9,R9 |
| |
| MOVD $32,R11 // set offsets to load into vector |
| MOVD $48,R12 // set offsets to load into vector |
| |
| PCALIGN $16 |
| cmp64_loop: |
| LXVD2X (R5)(R0),V3 // load bytes of A at offset 0 into vector |
| LXVD2X (R6)(R0),V4 // load bytes of B at offset 0 into vector |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different // jump out if its different |
| |
| LXVD2X (R5)(R10),V3 // load bytes of A at offset 16 into vector |
| LXVD2X (R6)(R10),V4 // load bytes of B at offset 16 into vector |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| |
| LXVD2X (R5)(R11),V3 // load bytes of A at offset 32 into vector |
| LXVD2X (R6)(R11),V4 // load bytes of B at offset 32 into vector |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| |
| LXVD2X (R5)(R12),V3 // load bytes of A at offset 64 into vector |
| LXVD2X (R6)(R12),V4 // load bytes of B at offset 64 into vector |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| |
| ADD $64,R5,R5 // increment to next 64 bytes of A |
| ADD $64,R6,R6 // increment to next 64 bytes of B |
| BDNZ cmp64_loop |
| BC $12,2,LR // beqlr |
| |
| // Finish out tail with minimal overlapped checking. |
| // Note, 0 tail is handled by beqlr above. |
| BLE CR1,cmp64_tail_gt0 |
| BLE CR2,cmp64_tail_gt16 |
| BLE CR3,cmp64_tail_gt32 |
| |
| cmp64_tail_gt48: // 49 - 63 B |
| LXVD2X (R0)(R5),V3 |
| LXVD2X (R0)(R6),V4 |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| |
| LXVD2X (R5)(R10),V3 |
| LXVD2X (R6)(R10),V4 |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| |
| LXVD2X (R5)(R11),V3 |
| LXVD2X (R6)(R11),V4 |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| |
| BR cmp64_tail_gt0 |
| |
| PCALIGN $16 |
| cmp64_tail_gt32: // 33 - 48B |
| LXVD2X (R0)(R5),V3 |
| LXVD2X (R0)(R6),V4 |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| |
| LXVD2X (R5)(R10),V3 |
| LXVD2X (R6)(R10),V4 |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| |
| BR cmp64_tail_gt0 |
| |
| PCALIGN $16 |
| cmp64_tail_gt16: // 17 - 32B |
| LXVD2X (R0)(R5),V3 |
| LXVD2X (R0)(R6),V4 |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| |
| BR cmp64_tail_gt0 |
| |
| PCALIGN $16 |
| cmp64_tail_gt0: // 1 - 16B |
| LXVD2X (R5)(R9),V3 |
| LXVD2X (R6)(R9),V4 |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| |
| RET |
| |
| PCALIGN $16 |
| cmp32: // 32 - 63B |
| ANDCC $31,R9,R9 |
| |
| LXVD2X (R0)(R5),V3 |
| LXVD2X (R0)(R6),V4 |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| |
| LXVD2X (R10)(R5),V3 |
| LXVD2X (R10)(R6),V4 |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| |
| BC $12,2,LR // beqlr |
| ADD R9,R10,R10 |
| |
| LXVD2X (R9)(R5),V3 |
| LXVD2X (R9)(R6),V4 |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| |
| LXVD2X (R10)(R5),V3 |
| LXVD2X (R10)(R6),V4 |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| RET |
| |
| PCALIGN $16 |
| cmp16: // 16 - 31B |
| ANDCC $15,R9,R9 |
| LXVD2X (R0)(R5),V3 |
| LXVD2X (R0)(R6),V4 |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| BC $12,2,LR // beqlr |
| |
| LXVD2X (R9)(R5),V3 |
| LXVD2X (R9)(R6),V4 |
| VCMPEQUDCC V3,V4,V1 |
| BGE CR6,different |
| RET |
| |
| PCALIGN $16 |
| different: |
| #ifdef GOARCH_ppc64le |
| MOVD $byteswap<>+00(SB),R16 |
| LXVD2X (R16)(R0),SWAP // Set up swap string |
| |
| VPERM V3,V3,SWAP,V3 |
| VPERM V4,V4,SWAP,V4 |
| #endif |
| |
| MFVSRD VS35,R16 // move upper doublewords of A and B into GPR for comparison |
| MFVSRD VS36,R10 |
| |
| CMPU R16,R10 |
| BEQ lower |
| SETB_CR0_NE(R3) |
| RET |
| |
| PCALIGN $16 |
| lower: |
| VSLDOI $8,V3,V3,V3 // move lower doublewords of A and B into GPR for comparison |
| MFVSRD VS35,R16 |
| VSLDOI $8,V4,V4,V4 |
| MFVSRD VS36,R10 |
| |
| CMPU R16,R10 |
| SETB_CR0_NE(R3) |
| RET |
| |
| PCALIGN $16 |
| cmp8: // 8 - 15B (0 - 15B if GOPPC64_power10) |
| #ifdef GOPPC64_power10 |
| SLD $56,R9,R9 |
| LXVLL R5,R9,V3 // Load bytes starting from MSB to LSB, unused are zero filled. |
| LXVLL R6,R9,V4 |
| VCMPUQ V3,V4,CR0 // Compare as a 128b integer. |
| SETB_CR0(R6) |
| ISEL CR0EQ,R3,R6,R3 // If equal, length determines the return value. |
| RET |
| #else |
| CMP R9,$8 |
| BLT cmp4 |
| ANDCC $7,R9,R9 |
| _LDBEX (R0)(R5),R10 |
| _LDBEX (R0)(R6),R11 |
| _LDBEX (R9)(R5),R12 |
| _LDBEX (R9)(R6),R14 |
| CMPU R10,R11,CR0 |
| SETB_CR0(R5) |
| CMPU R12,R14,CR1 |
| SETB_CR1(R6) |
| CRAND CR0EQ,CR1EQ,CR1EQ // If both equal, length determines return value. |
| ISEL CR0EQ,R6,R5,R4 |
| ISEL CR1EQ,R3,R4,R3 |
| RET |
| |
| PCALIGN $16 |
| cmp4: // 4 - 7B |
| CMP R9,$4 |
| BLT cmp2 |
| ANDCC $3,R9,R9 |
| _LWBEX (R0)(R5),R10 |
| _LWBEX (R0)(R6),R11 |
| _LWBEX (R9)(R5),R12 |
| _LWBEX (R9)(R6),R14 |
| RLDIMI $32,R10,$0,R12 |
| RLDIMI $32,R11,$0,R14 |
| CMPU R12,R14 |
| BR cmp0 |
| |
| PCALIGN $16 |
| cmp2: // 2 - 3B |
| CMP R9,$2 |
| BLT cmp1 |
| ANDCC $1,R9,R9 |
| _LHBEX (R0)(R5),R10 |
| _LHBEX (R0)(R6),R11 |
| _LHBEX (R9)(R5),R12 |
| _LHBEX (R9)(R6),R14 |
| RLDIMI $32,R10,$0,R12 |
| RLDIMI $32,R11,$0,R14 |
| CMPU R12,R14 |
| BR cmp0 |
| |
| PCALIGN $16 |
| cmp1: |
| CMP R9,$0 |
| BEQ cmp0 |
| MOVBZ (R5),R10 |
| MOVBZ (R6),R11 |
| CMPU R10,R11 |
| cmp0: |
| SETB_CR0(R6) |
| ISEL CR0EQ,R3,R6,R3 |
| RET |
| #endif |