diff --git a/src/internal/bytealg/equal_riscv64.s b/src/internal/bytealg/equal_riscv64.s index 22cb4fa97d..959a996f81 100644 --- a/src/internal/bytealg/equal_riscv64.s +++ b/src/internal/bytealg/equal_riscv64.s @@ -9,41 +9,119 @@ // func memequal(a, b unsafe.Pointer, size uintptr) bool TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 - MOV a+0(FP), A1 - MOV b+8(FP), A2 - BEQ A1, A2, eq - MOV size+16(FP), A3 - ADD A1, A3, A4 -loop: - BEQ A1, A4, eq - - MOVBU (A1), A6 - ADD $1, A1 - MOVBU (A2), A7 - ADD $1, A2 - BEQ A6, A7, loop - - MOVB ZERO, ret+24(FP) - RET -eq: - MOV $1, A1 - MOVB A1, ret+24(FP) - RET + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV size+16(FP), X7 + MOV $ret+24(FP), X19 + JMP memequal<>(SB) // func memequal_varlen(a, b unsafe.Pointer) bool -TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 - MOV a+0(FP), A1 - MOV b+8(FP), A2 - BEQ A1, A2, eq - MOV 8(CTXT), A3 // compiler stores size at offset 8 in the closure - MOV A1, 8(X2) - MOV A2, 16(X2) - MOV A3, 24(X2) - CALL runtime·memequal(SB) - MOVBU 32(X2), A1 - MOVB A1, ret+16(FP) +TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV 8(CTXT), X7 // compiler stores size at offset 8 in the closure + MOV $ret+16(FP), X19 + JMP memequal<>(SB) + +// On entry X5 and X6 contain pointers, X7 contains length. +// X19 contains address for return value. +TEXT memequal<>(SB),NOSPLIT|NOFRAME,$0 + BEQ X5, X6, eq + + MOV $32, X8 + BLT X7, X8, loop4_check + + // Check alignment - if alignment differs we have to do one byte at a time. + AND $3, X5, X9 + AND $3, X6, X10 + BNE X9, X10, loop4_check + BEQZ X9, loop32_check + + // Check one byte at a time until we reach 8 byte alignment. + SUB X9, X7, X7 +align: + ADD $-1, X9 + MOVBU 0(X5), X10 + MOVBU 0(X6), X11 + BNE X10, X11, not_eq + ADD $1, X5 + ADD $1, X6 + BNEZ X9, align + +loop32_check: + MOV $32, X9 + BLT X7, X9, loop16_check +loop32: + MOV 0(X5), X10 + MOV 0(X6), X11 + MOV 8(X5), X12 + MOV 8(X6), X13 + BNE X10, X11, not_eq + BNE X12, X13, not_eq + MOV 16(X5), X14 + MOV 16(X6), X15 + MOV 24(X5), X16 + MOV 24(X6), X17 + BNE X14, X15, not_eq + BNE X16, X17, not_eq + ADD $32, X5 + ADD $32, X6 + ADD $-32, X7 + BGE X7, X9, loop32 + BEQZ X7, eq + +loop16_check: + MOV $16, X8 + BLT X7, X8, loop4_check +loop16: + MOV 0(X5), X10 + MOV 0(X6), X11 + MOV 8(X5), X12 + MOV 8(X6), X13 + BNE X10, X11, not_eq + BNE X12, X13, not_eq + ADD $16, X5 + ADD $16, X6 + ADD $-16, X7 + BGE X7, X8, loop16 + BEQZ X7, eq + +loop4_check: + MOV $4, X8 + BLT X7, X8, loop1 +loop4: + MOVBU 0(X5), X10 + MOVBU 0(X6), X11 + MOVBU 1(X5), X12 + MOVBU 1(X6), X13 + BNE X10, X11, not_eq + BNE X12, X13, not_eq + MOVBU 2(X5), X14 + MOVBU 2(X6), X15 + MOVBU 3(X5), X16 + MOVBU 3(X6), X17 + BNE X14, X15, not_eq + BNE X16, X17, not_eq + ADD $4, X5 + ADD $4, X6 + ADD $-4, X7 + BGE X7, X8, loop4 + +loop1: + BEQZ X7, eq + MOVBU 0(X5), X10 + MOVBU 0(X6), X11 + BNE X10, X11, not_eq + ADD $1, X5 + ADD $1, X6 + ADD $-1, X7 + JMP loop1 + +not_eq: + MOV $0, X5 + MOVB X5, (X19) RET eq: - MOV $1, A1 - MOVB A1, ret+16(FP) + MOV $1, X5 + MOVB X5, (X19) RET