// Copyright 2025 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "go_asm.h" #include "textflag.h" TEXT ·Index(SB),NOSPLIT,$0-56 MOVV R7, R6 // R6 = separator pointer MOVV R8, R7 // R7 = separator length JMP indexbody<>(SB) TEXT ·IndexString(SB),NOSPLIT,$0-40 JMP indexbody<>(SB) // input: // R4 = string // R5 = length // R6 = separator pointer // R7 = separator length (2 <= len <= 64) TEXT indexbody<>(SB),NOSPLIT,$0 // main idea is to load 'sep' into separate register(s) // to avoid repeatedly re-load it again and again // for sebsequent substring comparisons SUBV R7, R5, R8 ADDV R4, R8 // R8 contains the start of last substring for comparison ADDV $1, R4, R9 // store base for later MOVV $8, R5 BGE R7, R5, len_gt_or_eq_8 len_2_7: AND $0x4, R7, R5 BNE R5, len_4_7 len_2_3: AND $0x1, R7, R5 BNE R5, len_3 len_2: MOVHU (R6), R10 loop_2: BLT R8, R4, not_found MOVHU (R4), R11 ADDV $1, R4 BNE R10, R11, loop_2 JMP found len_3: MOVHU (R6), R10 MOVBU 2(R6), R11 loop_3: BLT R8, R4, not_found MOVHU (R4), R12 ADDV $1, R4 BNE R10, R12, loop_3 MOVBU 1(R4), R13 BNE R11, R13, loop_3 JMP found len_4_7: AND $0x2, R7, R5 BNE R5, len_6_7 AND $0x1, R7, R5 BNE R5, len_5 len_4: MOVWU (R6), R10 loop_4: BLT R8, R4, not_found MOVWU (R4), R11 ADDV $1, R4 BNE R10, R11, loop_4 JMP found len_5: MOVWU (R6), R10 MOVBU 4(R6), R11 loop_5: BLT R8, R4, not_found MOVWU (R4), R12 ADDV $1, R4 BNE R10, R12, loop_5 MOVBU 3(R4), R13 BNE R11, R13, loop_5 JMP found len_6_7: AND $0x1, R7, R5 BNE R5, len_7 len_6: MOVWU (R6), R10 MOVHU 4(R6), R11 loop_6: BLT R8, R4, not_found MOVWU (R4), R12 ADDV $1, R4 BNE R10, R12, loop_6 MOVHU 3(R4), R13 BNE R11, R13, loop_6 JMP found len_7: MOVWU (R6), R10 MOVWU 3(R6), R11 loop_7: BLT R8, R4, not_found MOVWU (R4), R12 ADDV $1, R4 BNE R10, R12, loop_7 MOVWU 2(R4), R13 BNE R11, R13, loop_7 JMP found len_gt_or_eq_8: BEQ R5, R7, len_8 MOVV $17, R5 BGE R7, R5, len_gt_or_eq_17 JMP len_9_16 len_8: MOVV (R6), R10 loop_8: BLT R8, R4, not_found MOVV (R4), R11 ADDV $1, R4 BNE R10, R11, loop_8 JMP found len_9_16: MOVV (R6), R10 SUBV $8, R7 MOVV (R6)(R7), R11 SUBV $1, R7 loop_9_16: BLT R8, R4, not_found MOVV (R4), R12 ADDV $1, R4 BNE R10, R12, loop_9_16 MOVV (R4)(R7), R13 BNE R11, R13, loop_9_16 JMP found len_gt_or_eq_17: MOVV $25, R5 BGE R7, R5, len_gt_or_eq_25 len_17_24: MOVV 0(R6), R10 MOVV 8(R6), R11 SUBV $8, R7 MOVV (R6)(R7), R12 SUBV $1, R7 loop_17_24: BLT R8, R4, not_found MOVV (R4), R13 ADDV $1, R4 BNE R10, R13, loop_17_24 MOVV 7(R4), R14 BNE R11, R14, loop_17_24 MOVV (R4)(R7), R15 BNE R12, R15, loop_17_24 JMP found len_gt_or_eq_25: MOVV $33, R5 BGE R7, R5, len_gt_or_eq_33 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R10 BNE R10, lsx_len_25_32 len_25_32: MOVV 0(R6), R10 MOVV 8(R6), R11 MOVV 16(R6), R12 SUBV $8, R7 MOVV (R6)(R7), R13 SUBV $1, R7 loop_25_32: BLT R8, R4, not_found MOVV (R4), R14 ADDV $1, R4 BNE R10, R14, loop_25_32 MOVV 7(R4), R15 BNE R11, R15, loop_25_32 MOVV 15(R4), R16 BNE R12, R16, loop_25_32 MOVV (R4)(R7), R17 BNE R13, R17, loop_25_32 JMP found // On loong64, LSX is included if LASX is supported. lasx_len_25_32: lsx_len_25_32: VMOVQ 0(R6), V0 SUBV $16, R7 VMOVQ (R6)(R7), V1 SUBV $1, R7 lsx_loop_25_32: BLT R8, R4, not_found VMOVQ (R4), V2 ADDV $1, R4 VSEQV V0, V2, V2 VSETANYEQV V2, FCC0 BFPT FCC0, lsx_loop_25_32 VMOVQ (R4)(R7), V3 VSEQV V1, V3, V3 VSETANYEQV V3, FCC1 BFPT FCC1, lsx_loop_25_32 JMP found len_gt_or_eq_33: MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R10 MOVV $49, R5 BGE R7, R5, len_gt_or_eq_49 len_33_48: BNE R10, lasx_len_33_48 JMP lsx_len_33_48 len_gt_or_eq_49: len_49_64: BNE R10, lasx_len_49_64 JMP lsx_len_49_64 lsx_len_33_48: VMOVQ 0(R6), V0 VMOVQ 16(R6), V1 SUBV $16, R7 VMOVQ (R6)(R7), V2 SUBV $1, R7 lsx_loop_33_48: BLT R8, R4, not_found VMOVQ 0(R4), V3 ADDV $1, R4 VSEQV V0, V3, V3 VSETANYEQV V3, FCC0 BFPT FCC0, lsx_loop_33_48 VMOVQ 15(R4), V4 VSEQV V1, V4, V4 VSETANYEQV V4, FCC1 BFPT FCC1, lsx_loop_33_48 VMOVQ (R4)(R7), V5 VSEQV V2, V5, V5 VSETANYEQV V5, FCC2 BFPT FCC2, lsx_loop_33_48 JMP found lsx_len_49_64: VMOVQ 0(R6), V0 VMOVQ 16(R6), V1 VMOVQ 32(R6), V2 SUBV $16, R7 VMOVQ (R6)(R7), V3 SUBV $1, R7 lsx_loop_49_64: BLT R8, R4, not_found VMOVQ 0(R4), V4 ADDV $1, R4 VSEQV V0, V4, V4 VSETANYEQV V4, FCC0 BFPT FCC0, lsx_loop_49_64 VMOVQ 15(R4), V5 VSEQV V1, V5, V5 VSETANYEQV V5, FCC1 BFPT FCC1, lsx_loop_49_64 VMOVQ 31(R4), V6 VSEQV V2, V6, V6 VSETANYEQV V6, FCC2 BFPT FCC2, lsx_loop_49_64 VMOVQ (R4)(R7), V7 VSEQV V3, V7, V7 VSETANYEQV V7, FCC3 BFPT FCC3, lsx_loop_49_64 JMP found lasx_len_33_48: lasx_len_49_64: lasx_len_33_64: XVMOVQ (R6), X0 SUBV $32, R7 XVMOVQ (R6)(R7), X1 SUBV $1, R7 lasx_loop_33_64: BLT R8, R4, not_found XVMOVQ (R4), X2 ADDV $1, R4 XVSEQV X0, X2, X3 XVSETANYEQV X3, FCC0 BFPT FCC0, lasx_loop_33_64 XVMOVQ (R4)(R7), X4 XVSEQV X1, X4, X5 XVSETANYEQV X5, FCC1 BFPT FCC1, lasx_loop_33_64 JMP found found: SUBV R9, R4 RET not_found: MOVV $-1, R4 RET