| // Copyright 2018 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| #include "go_asm.h" |
| #include "textflag.h" |
| |
| TEXT ·Index(SB),NOSPLIT,$0-56 |
| MOVD a_base+0(FP), R0 |
| MOVD a_len+8(FP), R1 |
| MOVD b_base+24(FP), R2 |
| MOVD b_len+32(FP), R3 |
| MOVD $ret+48(FP), R9 |
| B indexbody<>(SB) |
| |
| TEXT ·IndexString(SB),NOSPLIT,$0-40 |
| MOVD a_base+0(FP), R0 |
| MOVD a_len+8(FP), R1 |
| MOVD b_base+16(FP), R2 |
| MOVD b_len+24(FP), R3 |
| MOVD $ret+32(FP), R9 |
| B indexbody<>(SB) |
| |
| // input: |
| // R0: haystack |
| // R1: length of haystack |
| // R2: needle |
| // R3: length of needle (2 <= len <= 32) |
| // R9: address to put result |
| TEXT indexbody<>(SB),NOSPLIT,$0-56 |
| // main idea is to load 'sep' into separate register(s) |
| // to avoid repeatedly re-load it again and again |
| // for sebsequent substring comparisons |
| SUB R3, R1, R4 |
| // R4 contains the start of last substring for comparison |
| ADD R0, R4, R4 |
| ADD $1, R0, R8 |
| |
| CMP $8, R3 |
| BHI greater_8 |
| TBZ $3, R3, len_2_7 |
| len_8: |
| // R5 contains 8-byte of sep |
| MOVD (R2), R5 |
| loop_8: |
| // R6 contains substring for comparison |
| CMP R4, R0 |
| BHI not_found |
| MOVD.P 1(R0), R6 |
| CMP R5, R6 |
| BNE loop_8 |
| B found |
| len_2_7: |
| TBZ $2, R3, len_2_3 |
| TBZ $1, R3, len_4_5 |
| TBZ $0, R3, len_6 |
| len_7: |
| // R5 and R6 contain 7-byte of sep |
| MOVWU (R2), R5 |
| // 1-byte overlap with R5 |
| MOVWU 3(R2), R6 |
| loop_7: |
| CMP R4, R0 |
| BHI not_found |
| MOVWU.P 1(R0), R3 |
| CMP R5, R3 |
| BNE loop_7 |
| MOVWU 2(R0), R3 |
| CMP R6, R3 |
| BNE loop_7 |
| B found |
| len_6: |
| // R5 and R6 contain 6-byte of sep |
| MOVWU (R2), R5 |
| MOVHU 4(R2), R6 |
| loop_6: |
| CMP R4, R0 |
| BHI not_found |
| MOVWU.P 1(R0), R3 |
| CMP R5, R3 |
| BNE loop_6 |
| MOVHU 3(R0), R3 |
| CMP R6, R3 |
| BNE loop_6 |
| B found |
| len_4_5: |
| TBZ $0, R3, len_4 |
| len_5: |
| // R5 and R7 contain 5-byte of sep |
| MOVWU (R2), R5 |
| MOVBU 4(R2), R7 |
| loop_5: |
| CMP R4, R0 |
| BHI not_found |
| MOVWU.P 1(R0), R3 |
| CMP R5, R3 |
| BNE loop_5 |
| MOVBU 3(R0), R3 |
| CMP R7, R3 |
| BNE loop_5 |
| B found |
| len_4: |
| // R5 contains 4-byte of sep |
| MOVWU (R2), R5 |
| loop_4: |
| CMP R4, R0 |
| BHI not_found |
| MOVWU.P 1(R0), R6 |
| CMP R5, R6 |
| BNE loop_4 |
| B found |
| len_2_3: |
| TBZ $0, R3, len_2 |
| len_3: |
| // R6 and R7 contain 3-byte of sep |
| MOVHU (R2), R6 |
| MOVBU 2(R2), R7 |
| loop_3: |
| CMP R4, R0 |
| BHI not_found |
| MOVHU.P 1(R0), R3 |
| CMP R6, R3 |
| BNE loop_3 |
| MOVBU 1(R0), R3 |
| CMP R7, R3 |
| BNE loop_3 |
| B found |
| len_2: |
| // R5 contains 2-byte of sep |
| MOVHU (R2), R5 |
| loop_2: |
| CMP R4, R0 |
| BHI not_found |
| MOVHU.P 1(R0), R6 |
| CMP R5, R6 |
| BNE loop_2 |
| found: |
| SUB R8, R0, R0 |
| MOVD R0, (R9) |
| RET |
| not_found: |
| MOVD $-1, R0 |
| MOVD R0, (R9) |
| RET |
| greater_8: |
| SUB $9, R3, R11 // len(sep) - 9, offset of R0 for last 8 bytes |
| CMP $16, R3 |
| BHI greater_16 |
| len_9_16: |
| MOVD.P 8(R2), R5 // R5 contains the first 8-byte of sep |
| SUB $16, R3, R7 // len(sep) - 16, offset of R2 for last 8 bytes |
| MOVD (R2)(R7), R6 // R6 contains the last 8-byte of sep |
| loop_9_16: |
| // search the first 8 bytes first |
| CMP R4, R0 |
| BHI not_found |
| MOVD.P 1(R0), R7 |
| CMP R5, R7 |
| BNE loop_9_16 |
| MOVD (R0)(R11), R7 |
| CMP R6, R7 // compare the last 8 bytes |
| BNE loop_9_16 |
| B found |
| greater_16: |
| CMP $24, R3 |
| BHI len_25_32 |
| len_17_24: |
| LDP.P 16(R2), (R5, R6) // R5 and R6 contain the first 16-byte of sep |
| SUB $24, R3, R10 // len(sep) - 24 |
| MOVD (R2)(R10), R7 // R7 contains the last 8-byte of sep |
| loop_17_24: |
| // search the first 16 bytes first |
| CMP R4, R0 |
| BHI not_found |
| MOVD.P 1(R0), R10 |
| CMP R5, R10 |
| BNE loop_17_24 |
| MOVD 7(R0), R10 |
| CMP R6, R10 |
| BNE loop_17_24 |
| MOVD (R0)(R11), R10 |
| CMP R7, R10 // compare the last 8 bytes |
| BNE loop_17_24 |
| B found |
| len_25_32: |
| LDP.P 16(R2), (R5, R6) |
| MOVD.P 8(R2), R7 // R5, R6 and R7 contain the first 24-byte of sep |
| SUB $32, R3, R12 // len(sep) - 32 |
| MOVD (R2)(R12), R10 // R10 contains the last 8-byte of sep |
| loop_25_32: |
| // search the first 24 bytes first |
| CMP R4, R0 |
| BHI not_found |
| MOVD.P 1(R0), R12 |
| CMP R5, R12 |
| BNE loop_25_32 |
| MOVD 7(R0), R12 |
| CMP R6, R12 |
| BNE loop_25_32 |
| MOVD 15(R0), R12 |
| CMP R7, R12 |
| BNE loop_25_32 |
| MOVD (R0)(R11), R12 |
| CMP R10, R12 // compare the last 8 bytes |
| BNE loop_25_32 |
| B found |