internal/bytealg: port bytealg functions to reg ABI on riscv64

This CL adds support for the reg ABI to the bytes functions for
riscv64. These are initially under control of the
GOEXPERIMENT macro until all changes are in.

Change-Id: I026295ae38e2aba055f7a51c77f92c1921e5ec97
Reviewed-on: https://go-review.googlesource.com/c/go/+/361916
Run-TryBot: mzh <mzh@golangcn.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
This commit is contained in:
Meng Zhuo 2022-03-30 18:19:48 +08:00 committed by mzh
parent bf19163a54
commit be1d7388b3
4 changed files with 315 additions and 238 deletions

View File

@ -5,161 +5,179 @@
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOV a_base+0(FP), X5
MOV a_len+8(FP), X6
MOV b_base+24(FP), X7
MOV b_len+32(FP), X8
MOV $ret+48(FP), X9
TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
#ifndef GOEXPERIMENT_regabiargs
MOV a_base+0(FP), X10
MOV a_len+8(FP), X11
MOV b_base+24(FP), X12
MOV b_len+32(FP), X13
MOV $ret+48(FP), X14
#else
// X10 = a_base
// X11 = a_len
// X12 = a_cap (unused)
// X13 = b_base (want in X12)
// X14 = b_len (want in X13)
// X15 = b_cap (unused)
MOV X13, X12
MOV X14, X13
#endif
JMP compare<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOV a_base+0(FP), X5
MOV a_len+8(FP), X6
MOV b_base+16(FP), X7
MOV b_len+24(FP), X8
MOV $ret+32(FP), X9
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
#ifndef GOEXPERIMENT_regabiargs
MOV a_base+0(FP), X10
MOV a_len+8(FP), X11
MOV b_base+16(FP), X12
MOV b_len+24(FP), X13
MOV $ret+32(FP), X14
#endif
// X10 = a_base
// X11 = a_len
// X12 = b_base
// X13 = b_len
JMP compare<>(SB)
// On entry:
// X5 points to start of a
// X6 length of a
// X7 points to start of b
// X8 length of b
// X9 points to the address to store the return value (-1/0/1)
// X10 points to start of a
// X11 length of a
// X12 points to start of b
// X13 length of b
// for non-regabi X14 points to the address to store the return value (-1/0/1)
// for regabi the return value in X10
TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
BEQ X5, X7, cmp_len
BEQ X10, X12, cmp_len
MOV X6, X10
BGE X8, X10, use_a_len // X10 = min(len(a), len(b))
MOV X8, X10
MOV X11, X5
BGE X13, X5, use_a_len // X5 = min(len(a), len(b))
MOV X13, X5
use_a_len:
BEQZ X10, cmp_len
BEQZ X5, cmp_len
MOV $32, X11
BLT X10, X11, loop4_check
MOV $32, X6
BLT X5, X6, loop4_check
// Check alignment - if alignment differs we have to do one byte at a time.
AND $3, X5, X12
AND $3, X7, X13
BNE X12, X13, loop4_check
BEQZ X12, loop32_check
AND $3, X10, X7
AND $3, X12, X8
BNE X7, X8, loop4_check
BEQZ X7, loop32_check
// Check one byte at a time until we reach 8 byte alignment.
SUB X12, X10, X10
SUB X7, X5, X5
align:
ADD $-1, X12
MOVBU 0(X5), X13
MOVBU 0(X7), X14
BNE X13, X14, cmp
ADD $1, X5
ADD $1, X7
BNEZ X12, align
ADD $-1, X7
MOVBU 0(X10), X8
MOVBU 0(X12), X9
BNE X8, X9, cmp
ADD $1, X10
ADD $1, X12
BNEZ X7, align
loop32_check:
MOV $32, X12
BLT X10, X12, loop16_check
MOV $32, X7
BLT X5, X7, loop16_check
loop32:
MOV 0(X5), X15
MOV 0(X7), X16
MOV 8(X5), X17
MOV 8(X7), X18
MOV 0(X10), X15
MOV 0(X12), X16
MOV 8(X10), X17
MOV 8(X12), X18
BEQ X15, X16, loop32a
JMP cmp8a
loop32a:
BEQ X17, X18, loop32b
JMP cmp8b
loop32b:
MOV 16(X5), X15
MOV 16(X7), X16
MOV 24(X5), X17
MOV 24(X7), X18
MOV 16(X10), X15
MOV 16(X12), X16
MOV 24(X10), X17
MOV 24(X12), X18
BEQ X15, X16, loop32c
JMP cmp8a
loop32c:
BEQ X17, X18, loop32d
JMP cmp8b
loop32d:
ADD $32, X5
ADD $32, X7
ADD $-32, X10
BGE X10, X12, loop32
BEQZ X10, cmp_len
ADD $32, X10
ADD $32, X12
ADD $-32, X5
BGE X5, X7, loop32
BEQZ X5, cmp_len
loop16_check:
MOV $16, X11
BLT X10, X11, loop4_check
MOV $16, X6
BLT X5, X6, loop4_check
loop16:
MOV 0(X5), X15
MOV 0(X7), X16
MOV 8(X5), X17
MOV 8(X7), X18
MOV 0(X10), X15
MOV 0(X12), X16
MOV 8(X10), X17
MOV 8(X12), X18
BEQ X15, X16, loop16a
JMP cmp8a
loop16a:
BEQ X17, X18, loop16b
JMP cmp8b
loop16b:
ADD $16, X5
ADD $16, X7
ADD $-16, X10
BGE X10, X11, loop16
BEQZ X10, cmp_len
ADD $16, X10
ADD $16, X12
ADD $-16, X5
BGE X5, X6, loop16
BEQZ X5, cmp_len
loop4_check:
MOV $4, X11
BLT X10, X11, loop1
MOV $4, X6
BLT X5, X6, loop1
loop4:
MOVBU 0(X5), X13
MOVBU 0(X7), X14
MOVBU 1(X5), X15
MOVBU 1(X7), X16
BEQ X13, X14, loop4a
SLTU X14, X13, X10
SLTU X13, X14, X11
MOVBU 0(X10), X8
MOVBU 0(X12), X9
MOVBU 1(X10), X15
MOVBU 1(X12), X16
BEQ X8, X9, loop4a
SLTU X9, X8, X5
SLTU X8, X9, X6
JMP cmp_ret
loop4a:
BEQ X15, X16, loop4b
SLTU X16, X15, X10
SLTU X15, X16, X11
SLTU X16, X15, X5
SLTU X15, X16, X6
JMP cmp_ret
loop4b:
MOVBU 2(X5), X21
MOVBU 2(X7), X22
MOVBU 3(X5), X23
MOVBU 3(X7), X24
MOVBU 2(X10), X21
MOVBU 2(X12), X22
MOVBU 3(X10), X23
MOVBU 3(X12), X24
BEQ X21, X22, loop4c
SLTU X22, X21, X10
SLTU X21, X22, X11
SLTU X22, X21, X5
SLTU X21, X22, X6
JMP cmp_ret
loop4c:
BEQ X23, X24, loop4d
SLTU X24, X23, X10
SLTU X23, X24, X11
SLTU X24, X23, X5
SLTU X23, X24, X6
JMP cmp_ret
loop4d:
ADD $4, X5
ADD $4, X7
ADD $-4, X10
BGE X10, X11, loop4
ADD $4, X10
ADD $4, X12
ADD $-4, X5
BGE X5, X6, loop4
loop1:
BEQZ X10, cmp_len
MOVBU 0(X5), X13
MOVBU 0(X7), X14
BNE X13, X14, cmp
ADD $1, X5
ADD $1, X7
ADD $-1, X10
BEQZ X5, cmp_len
MOVBU 0(X10), X8
MOVBU 0(X12), X9
BNE X8, X9, cmp
ADD $1, X10
ADD $1, X12
ADD $-1, X5
JMP loop1
// Compare 8 bytes of memory in X15/X16 that are known to differ.
cmp8a:
MOV $0xff, X19
cmp8a_loop:
AND X15, X19, X13
AND X16, X19, X14
BNE X13, X14, cmp
AND X15, X19, X8
AND X16, X19, X9
BNE X8, X9, cmp
SLLI $8, X19
JMP cmp8a_loop
@ -167,19 +185,21 @@ cmp8a_loop:
cmp8b:
MOV $0xff, X19
cmp8b_loop:
AND X17, X19, X13
AND X18, X19, X14
BNE X13, X14, cmp
AND X17, X19, X8
AND X18, X19, X9
BNE X8, X9, cmp
SLLI $8, X19
JMP cmp8b_loop
cmp_len:
MOV X6, X13
MOV X8, X14
MOV X11, X8
MOV X13, X9
cmp:
SLTU X14, X13, X10
SLTU X13, X14, X11
SLTU X9, X8, X5
SLTU X8, X9, X6
cmp_ret:
SUB X10, X11, X12
MOV X12, (X9)
SUB X5, X6, X10
#ifndef GOEXPERIMENT_regabiargs
MOV X10, (X14)
#endif
RET

View File

@ -5,40 +5,61 @@
#include "go_asm.h"
#include "textflag.h"
TEXT ·Count(SB),NOSPLIT,$0-40
MOV b_base+0(FP), A1
MOV b_len+8(FP), A2
MOVBU c+24(FP), A3 // byte to count
MOV ZERO, A4 // count
ADD A1, A2 // end
TEXT ·Count<ABIInternal>(SB),NOSPLIT,$0-40
#ifndef GOEXPERIMENT_regabiargs
MOV b_base+0(FP), X10
MOV b_len+8(FP), X11
MOVBU c+24(FP), X12 // byte to count
#else
// X10 = b_base
// X11 = b_len
// X12 = b_cap (unused)
// X13 = byte to count (want in X12)
MOV X13, X12
#endif
MOV ZERO, X14 // count
ADD X10, X11 // end
loop:
BEQ A1, A2, done
MOVBU (A1), A5
ADD $1, A1
BNE A3, A5, loop
ADD $1, A4
BEQ X10, X11, done
MOVBU (X10), X15
ADD $1, X10
BNE X12, X15, loop
ADD $1, X14
JMP loop
done:
MOV A4, ret+32(FP)
#ifndef GOEXPERIMENT_regabiargs
MOV X14, ret+32(FP)
#else
MOV X14, X10
#endif
RET
TEXT ·CountString(SB),NOSPLIT,$0-32
MOV s_base+0(FP), A1
MOV s_len+8(FP), A2
MOVBU c+16(FP), A3 // byte to count
MOV ZERO, A4 // count
ADD A1, A2 // end
TEXT ·CountString<ABIInternal>(SB),NOSPLIT,$0-32
#ifndef GOEXPERIMENT_regabiargs
MOV s_base+0(FP), X10
MOV s_len+8(FP), X11
MOVBU c+16(FP), X12 // byte to count
#endif
// X10 = s_base
// X11 = s_len
// X12 = byte to count
MOV ZERO, X14 // count
ADD X10, X11 // end
loop:
BEQ A1, A2, done
MOVBU (A1), A5
ADD $1, A1
BNE A3, A5, loop
ADD $1, A4
BEQ X10, X11, done
MOVBU (X10), X15
ADD $1, X10
BNE X12, X15, loop
ADD $1, X14
JMP loop
done:
MOV A4, ret+24(FP)
#ifndef GOEXPERIMENT_regabiargs
MOV X14, ret+24(FP)
#else
MOV X14, X10
#endif
RET

View File

@ -8,120 +8,137 @@
#define CTXT S10
// func memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
MOV a+0(FP), X5
MOV b+8(FP), X6
MOV size+16(FP), X7
MOV $ret+24(FP), X19
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
#ifndef GOEXPERIMENT_regabiargs
MOV a+0(FP), X10
MOV b+8(FP), X11
MOV size+16(FP), X12
MOV $ret+24(FP), X13
#endif
// X10 = a_base
// X11 = b_base
// X12 = size
JMP memequal<>(SB)
// func memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
MOV a+0(FP), X5
MOV b+8(FP), X6
MOV 8(CTXT), X7 // compiler stores size at offset 8 in the closure
MOV $ret+16(FP), X19
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17
MOV 8(CTXT), X12 // compiler stores size at offset 8 in the closure
#ifndef GOEXPERIMENT_regabiargs
MOV a+0(FP), X10
MOV b+8(FP), X11
MOV $ret+16(FP), X13
#endif
// X10 = a_base
// X11 = b_base
JMP memequal<>(SB)
// On entry X5 and X6 contain pointers, X7 contains length.
// X19 contains address for return value.
// On entry X10 and X11 contain pointers, X12 contains length.
// For non-regabi X13 contains address for return value.
// For regabi return value in X10.
TEXT memequal<>(SB),NOSPLIT|NOFRAME,$0
BEQ X5, X6, eq
BEQ X10, X11, eq
MOV $32, X8
BLT X7, X8, loop4_check
MOV $32, X23
BLT X12, X23, loop4_check
// Check alignment - if alignment differs we have to do one byte at a time.
AND $3, X5, X9
AND $3, X6, X10
BNE X9, X10, loop4_check
AND $3, X10, X9
AND $3, X11, X19
BNE X9, X19, loop4_check
BEQZ X9, loop32_check
// Check one byte at a time until we reach 8 byte alignment.
SUB X9, X7, X7
SUB X9, X12, X12
align:
ADD $-1, X9
MOVBU 0(X5), X10
MOVBU 0(X6), X11
BNE X10, X11, not_eq
ADD $1, X5
ADD $1, X6
MOVBU 0(X10), X19
MOVBU 0(X11), X20
BNE X19, X20, not_eq
ADD $1, X10
ADD $1, X11
BNEZ X9, align
loop32_check:
MOV $32, X9
BLT X7, X9, loop16_check
BLT X12, X9, loop16_check
loop32:
MOV 0(X5), X10
MOV 0(X6), X11
MOV 8(X5), X12
MOV 8(X6), X13
BNE X10, X11, not_eq
BNE X12, X13, not_eq
MOV 16(X5), X14
MOV 16(X6), X15
MOV 24(X5), X16
MOV 24(X6), X17
MOV 0(X10), X19
MOV 0(X11), X20
MOV 8(X10), X21
MOV 8(X11), X22
BNE X19, X20, not_eq
BNE X21, X22, not_eq
MOV 16(X10), X14
MOV 16(X11), X15
MOV 24(X10), X16
MOV 24(X11), X17
BNE X14, X15, not_eq
BNE X16, X17, not_eq
ADD $32, X5
ADD $32, X6
ADD $-32, X7
BGE X7, X9, loop32
BEQZ X7, eq
ADD $32, X10
ADD $32, X11
ADD $-32, X12
BGE X12, X9, loop32
BEQZ X12, eq
loop16_check:
MOV $16, X8
BLT X7, X8, loop4_check
MOV $16, X23
BLT X12, X23, loop4_check
loop16:
MOV 0(X5), X10
MOV 0(X6), X11
MOV 8(X5), X12
MOV 8(X6), X13
BNE X10, X11, not_eq
BNE X12, X13, not_eq
ADD $16, X5
ADD $16, X6
ADD $-16, X7
BGE X7, X8, loop16
BEQZ X7, eq
MOV 0(X10), X19
MOV 0(X11), X20
MOV 8(X10), X21
MOV 8(X11), X22
BNE X19, X20, not_eq
BNE X21, X22, not_eq
ADD $16, X10
ADD $16, X11
ADD $-16, X12
BGE X12, X23, loop16
BEQZ X12, eq
loop4_check:
MOV $4, X8
BLT X7, X8, loop1
MOV $4, X23
BLT X12, X23, loop1
loop4:
MOVBU 0(X5), X10
MOVBU 0(X6), X11
MOVBU 1(X5), X12
MOVBU 1(X6), X13
BNE X10, X11, not_eq
BNE X12, X13, not_eq
MOVBU 2(X5), X14
MOVBU 2(X6), X15
MOVBU 3(X5), X16
MOVBU 3(X6), X17
MOVBU 0(X10), X19
MOVBU 0(X11), X20
MOVBU 1(X10), X21
MOVBU 1(X11), X22
BNE X19, X20, not_eq
BNE X21, X22, not_eq
MOVBU 2(X10), X14
MOVBU 2(X11), X15
MOVBU 3(X10), X16
MOVBU 3(X11), X17
BNE X14, X15, not_eq
BNE X16, X17, not_eq
ADD $4, X5
ADD $4, X6
ADD $-4, X7
BGE X7, X8, loop4
ADD $4, X10
ADD $4, X11
ADD $-4, X12
BGE X12, X23, loop4
loop1:
BEQZ X7, eq
MOVBU 0(X5), X10
MOVBU 0(X6), X11
BNE X10, X11, not_eq
ADD $1, X5
ADD $1, X6
ADD $-1, X7
BEQZ X12, eq
MOVBU 0(X10), X19
MOVBU 0(X11), X20
BNE X19, X20, not_eq
ADD $1, X10
ADD $1, X11
ADD $-1, X12
JMP loop1
not_eq:
MOV $0, X5
MOVB X5, (X19)
#ifndef GOEXPERIMENT_regabiargs
MOVB ZERO, (X13)
#else
MOVB ZERO, X10
#endif
RET
eq:
MOV $1, X5
MOVB X5, (X19)
#ifndef GOEXPERIMENT_regabiargs
MOV $1, X10
MOVB X10, (X13)
#else
MOV $1, X10
#endif
RET

View File

@ -5,48 +5,67 @@
#include "go_asm.h"
#include "textflag.h"
TEXT ·IndexByte(SB),NOSPLIT,$0-40
MOV b_base+0(FP), A1
MOV b_len+8(FP), A2
MOVBU c+24(FP), A3 // byte to find
MOV A1, A4 // store base for later
ADD A1, A2 // end
ADD $-1, A1
TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
#ifndef GOEXPERIMENT_regabiargs
MOV b_base+0(FP), X10
MOV b_len+8(FP), X11
MOVBU c+24(FP), X13 // byte to find
#endif
// X10 = b_base
// X11 = b_len
// X12 = b_cap (unused)
// X13 = byte to find
MOV X10, X12 // store base for later
ADD X10, X11 // end
ADD $-1, X10
loop:
ADD $1, A1
BEQ A1, A2, notfound
MOVBU (A1), A5
BNE A3, A5, loop
ADD $1, X10
BEQ X10, X11, notfound
MOVBU (X10), X14
BNE X13, X14, loop
SUB A4, A1 // remove base
MOV A1, ret+32(FP)
SUB X12, X10 // remove base
#ifndef GOEXPERIMENT_regabiargs
MOV X10, ret+32(FP)
#endif
RET
notfound:
MOV $-1, A1
MOV A1, ret+32(FP)
MOV $-1, X10
#ifndef GOEXPERIMENT_regabiargs
MOV X10, ret+32(FP)
#endif
RET
TEXT ·IndexByteString(SB),NOSPLIT,$0-32
MOV s_base+0(FP), A1
MOV s_len+8(FP), A2
MOVBU c+16(FP), A3 // byte to find
MOV A1, A4 // store base for later
ADD A1, A2 // end
ADD $-1, A1
TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
#ifndef GOEXPERIMENT_regabiargs
MOV s_base+0(FP), X10
MOV s_len+8(FP), X11
MOVBU c+16(FP), X12 // byte to find
#endif
// X10 = b_base
// X11 = b_len
// X12 = byte to find
MOV X10, X13 // store base for later
ADD X10, X11 // end
ADD $-1, X10
loop:
ADD $1, A1
BEQ A1, A2, notfound
MOVBU (A1), A5
BNE A3, A5, loop
ADD $1, X10
BEQ X10, X11, notfound
MOVBU (X10), X14
BNE X12, X14, loop
SUB A4, A1 // remove base
MOV A1, ret+24(FP)
SUB X13, X10 // remove base
#ifndef GOEXPERIMENT_regabiargs
MOV X10, ret+24(FP)
#endif
RET
notfound:
MOV $-1, A1
MOV A1, ret+24(FP)
MOV $-1, X10
#ifndef GOEXPERIMENT_regabiargs
MOV X10, ret+24(FP)
#endif
RET