| // Copyright 2018 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| #include "go_asm.h" |
| #include "textflag.h" |
| |
| // Caller must confirm availability of vx facility before calling. |
| TEXT ·Index(SB),NOSPLIT|NOFRAME,$0-56 |
| LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s) |
| LMG b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep) |
| MOVD $ret+48(FP), R5 |
| BR indexbody<>(SB) |
| |
| // Caller must confirm availability of vx facility before calling. |
| TEXT ·IndexString(SB),NOSPLIT|NOFRAME,$0-40 |
| LMG a_base+0(FP), R1, R2 // R1=&s[0], R2=len(s) |
| LMG b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep) |
| MOVD $ret+32(FP), R5 |
| BR indexbody<>(SB) |
| |
| // s: string we are searching |
| // sep: string to search for |
| // R1=&s[0], R2=len(s) |
| // R3=&sep[0], R4=len(sep) |
| // R5=&ret (int) |
| // Caller must confirm availability of vx facility before calling. |
| TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0 |
| CMPBGT R4, R2, notfound |
| ADD R1, R2 |
| SUB R4, R2 // R2=&s[len(s)-len(sep)] (last valid index) |
| CMPBEQ R4, $0, notfound |
| SUB $1, R4 // R4=len(sep)-1 for use as VLL index |
| VLL R4, (R3), V0 // contains first 16 bytes of sep |
| MOVD R1, R7 |
| index2plus: |
| CMPBNE R4, $1, index3plus |
| MOVD $15(R7), R9 |
| CMPBGE R9, R2, index2to16 |
| VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00... |
| VONE V16 |
| VREPH $0, V0, V1 |
| CMPBGE R9, R2, index2to16 |
| index2loop: |
| VL 0(R7), V2 // 16 bytes, even indices |
| VL 1(R7), V4 // 16 bytes, odd indices |
| VCEQH V1, V2, V5 // compare even indices |
| VCEQH V1, V4, V6 // compare odd indices |
| VSEL V5, V6, V31, V7 // merge even and odd indices |
| VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found |
| BLT foundV17 |
| MOVD $16(R7), R7 // R7+=16 |
| ADD $15, R7, R9 |
| CMPBLE R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search) |
| CMPBLE R7, R2, index2to16 |
| BR notfound |
| |
| index3plus: |
| CMPBNE R4, $2, index4plus |
| ADD $15, R7, R9 |
| CMPBGE R9, R2, index2to16 |
| MOVD $1, R0 |
| VGBM $0xaaaa, V31 // 0xff00ff00ff00ff00... |
| VONE V16 |
| VREPH $0, V0, V1 |
| VREPB $2, V0, V8 |
| index3loop: |
| VL (R7), V2 // load 16-bytes into V2 |
| VLL R0, 16(R7), V3 // load 2-bytes into V3 |
| VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1 |
| VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<2 |
| VCEQH V1, V2, V5 // compare 2-byte even indices |
| VCEQH V1, V4, V6 // compare 2-byte odd indices |
| VCEQB V8, V9, V10 // compare last bytes |
| VSEL V5, V6, V31, V7 // merge even and odd indices |
| VN V7, V10, V7 // AND indices with last byte |
| VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found |
| BLT foundV17 |
| MOVD $16(R7), R7 // R7+=16 |
| ADD $15, R7, R9 |
| CMPBLE R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search) |
| CMPBLE R7, R2, index2to16 |
| BR notfound |
| |
| index4plus: |
| CMPBNE R4, $3, index5plus |
| ADD $15, R7, R9 |
| CMPBGE R9, R2, index2to16 |
| MOVD $2, R0 |
| VGBM $0x8888, V29 // 0xff000000ff000000... |
| VGBM $0x2222, V30 // 0x0000ff000000ff00... |
| VGBM $0xcccc, V31 // 0xffff0000ffff0000... |
| VONE V16 |
| VREPF $0, V0, V1 |
| index4loop: |
| VL (R7), V2 // load 16-bytes into V2 |
| VLL R0, 16(R7), V3 // load 3-bytes into V3 |
| VSLDB $1, V2, V3, V4 // V4=(V2:V3)<<1 |
| VSLDB $2, V2, V3, V9 // V9=(V2:V3)<<1 |
| VSLDB $3, V2, V3, V10 // V10=(V2:V3)<<1 |
| VCEQF V1, V2, V5 // compare index 0, 4, ... |
| VCEQF V1, V4, V6 // compare index 1, 5, ... |
| VCEQF V1, V9, V11 // compare index 2, 6, ... |
| VCEQF V1, V10, V12 // compare index 3, 7, ... |
| VSEL V5, V6, V29, V13 // merge index 0, 1, 4, 5, ... |
| VSEL V11, V12, V30, V14 // merge index 2, 3, 6, 7, ... |
| VSEL V13, V14, V31, V7 // final merge |
| VFEEBS V16, V7, V17 // find leftmost index, set condition to 1 if found |
| BLT foundV17 |
| MOVD $16(R7), R7 // R7+=16 |
| ADD $15, R7, R9 |
| CMPBLE R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search) |
| CMPBLE R7, R2, index2to16 |
| BR notfound |
| |
| index5plus: |
| CMPBGT R4, $15, index17plus |
| index2to16: |
| CMPBGT R7, R2, notfound |
| MOVD $1(R7), R8 |
| CMPBGT R8, R2, index2to16tail |
| index2to16loop: |
| // unrolled 2x |
| VLL R4, (R7), V1 |
| VLL R4, 1(R7), V2 |
| VCEQGS V0, V1, V3 |
| BEQ found |
| MOVD $1(R7), R7 |
| VCEQGS V0, V2, V4 |
| BEQ found |
| MOVD $1(R7), R7 |
| CMPBLT R7, R2, index2to16loop |
| CMPBGT R7, R2, notfound |
| index2to16tail: |
| VLL R4, (R7), V1 |
| VCEQGS V0, V1, V2 |
| BEQ found |
| BR notfound |
| |
| index17plus: |
| CMPBGT R4, $31, index33plus |
| SUB $16, R4, R0 |
| VLL R0, 16(R3), V1 |
| VONE V7 |
| index17to32loop: |
| VL (R7), V2 |
| VLL R0, 16(R7), V3 |
| VCEQG V0, V2, V4 |
| VCEQG V1, V3, V5 |
| VN V4, V5, V6 |
| VCEQGS V6, V7, V8 |
| BEQ found |
| MOVD $1(R7), R7 |
| CMPBLE R7, R2, index17to32loop |
| BR notfound |
| |
| index33plus: |
| CMPBGT R4, $47, index49plus |
| SUB $32, R4, R0 |
| VL 16(R3), V1 |
| VLL R0, 32(R3), V2 |
| VONE V11 |
| index33to48loop: |
| VL (R7), V3 |
| VL 16(R7), V4 |
| VLL R0, 32(R7), V5 |
| VCEQG V0, V3, V6 |
| VCEQG V1, V4, V7 |
| VCEQG V2, V5, V8 |
| VN V6, V7, V9 |
| VN V8, V9, V10 |
| VCEQGS V10, V11, V12 |
| BEQ found |
| MOVD $1(R7), R7 |
| CMPBLE R7, R2, index33to48loop |
| BR notfound |
| |
| index49plus: |
| CMPBGT R4, $63, index65plus |
| SUB $48, R4, R0 |
| VL 16(R3), V1 |
| VL 32(R3), V2 |
| VLL R0, 48(R3), V3 |
| VONE V15 |
| index49to64loop: |
| VL (R7), V4 |
| VL 16(R7), V5 |
| VL 32(R7), V6 |
| VLL R0, 48(R7), V7 |
| VCEQG V0, V4, V8 |
| VCEQG V1, V5, V9 |
| VCEQG V2, V6, V10 |
| VCEQG V3, V7, V11 |
| VN V8, V9, V12 |
| VN V10, V11, V13 |
| VN V12, V13, V14 |
| VCEQGS V14, V15, V16 |
| BEQ found |
| MOVD $1(R7), R7 |
| CMPBLE R7, R2, index49to64loop |
| notfound: |
| MOVD $-1, (R5) |
| RET |
| |
| index65plus: |
| // not implemented |
| MOVD $0, (R0) |
| RET |
| |
| foundV17: // index is in doubleword V17[0] |
| VLGVG $0, V17, R8 |
| ADD R8, R7 |
| found: |
| SUB R1, R7 |
| MOVD R7, (R5) |
| RET |