mirror of https://github.com/golang/go.git
internal/bytealg: improve mips64x equal on large size
name old time/op new time/op delta Equal/0 9.94ns ± 4% 9.12ns ± 5% -8.26% (p=0.000 n=10+10) Equal/1 24.5ns ± 0% 27.2ns ± 1% +11.22% (p=0.000 n=9+10) Equal/6 28.1ns ± 0% 32.1ns ± 1% +14.20% (p=0.000 n=8+10) Equal/9 37.1ns ± 0% 37.8ns ± 1% +1.95% (p=0.000 n=8+9) Equal/15 47.3ns ± 0% 44.3ns ± 0% -6.34% (p=0.000 n=9+10) Equal/16 42.9ns ± 0% 24.6ns ± 0% -42.66% (p=0.000 n=10+7) Equal/20 44.3ns ± 0% 57.4ns ± 0% +29.57% (p=0.000 n=9+10) Equal/32 63.2ns ± 0% 35.8ns ± 0% -43.35% (p=0.000 n=10+10) Equal/4K 6.49µs ± 0% 0.50µs ± 0% -92.27% (p=0.000 n=10+8) Equal/4M 6.70ms ± 0% 0.48ms ± 0% -92.78% (p=0.000 n=8+10) Equal/64M 110ms ± 0% 8ms ± 0% -92.65% (p=0.000 n=9+9) CompareBytesEqual 36.6ns ± 0% 35.9ns ± 0% -1.83% (p=0.000 n=10+9) name old speed new speed delta Equal/1 40.8MB/s ± 0% 36.7MB/s ± 0% -10.16% (p=0.000 n=10+10) Equal/6 213MB/s ± 0% 187MB/s ± 1% -12.32% (p=0.000 n=10+10) Equal/9 243MB/s ± 0% 238MB/s ± 1% -1.94% (p=0.000 n=9+10) Equal/15 317MB/s ± 0% 339MB/s ± 0% +6.86% (p=0.000 n=9+9) Equal/16 373MB/s ± 0% 651MB/s ± 0% +74.70% (p=0.000 n=8+10) Equal/20 452MB/s ± 0% 348MB/s ± 0% -22.90% (p=0.000 n=8+10) Equal/32 506MB/s ± 0% 893MB/s ± 0% +76.53% (p=0.000 n=10+9) Equal/4K 631MB/s ± 0% 8166MB/s ± 0% +1194.73% (p=0.000 n=10+10) Equal/4M 626MB/s ± 0% 8673MB/s ± 0% +1284.94% (p=0.000 n=8+10) Equal/64M 608MB/s ± 0% 8277MB/s ± 0% +1260.83% (p=0.000 n=9+9) Change-Id: I1cd14ade16390a5097a8d4e9721d5e822fa6218f Reviewed-on: https://go-review.googlesource.com/c/go/+/199597 Run-TryBot: Meng Zhuo <mzh@golangcn.org> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com> Trust: Meng Zhuo <mzh@golangcn.org>
This commit is contained in:
parent
646531c52a
commit
4f597abe77
|
|
@ -16,18 +16,82 @@ TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
|
|||
BEQ R1, R2, eq
|
||||
MOVV size+16(FP), R3
|
||||
ADDV R1, R3, R4
|
||||
loop:
|
||||
BNE R1, R4, test
|
||||
|
||||
// chunk size is 16
|
||||
SGTU $16, R3, R8
|
||||
BEQ R0, R8, chunk_entry
|
||||
|
||||
byte_loop:
|
||||
BNE R1, R4, byte_test
|
||||
MOVV $1, R1
|
||||
MOVB R1, ret+24(FP)
|
||||
RET
|
||||
test:
|
||||
byte_test:
|
||||
MOVBU (R1), R6
|
||||
ADDV $1, R1
|
||||
MOVBU (R2), R7
|
||||
ADDV $1, R2
|
||||
BEQ R6, R7, loop
|
||||
BEQ R6, R7, byte_loop
|
||||
JMP not_eq
|
||||
|
||||
chunk_entry:
|
||||
// make sure both a and b are aligned
|
||||
OR R1, R2, R9
|
||||
AND $0x7, R9
|
||||
BNE R0, R9, byte_loop
|
||||
JMP chunk_loop_1
|
||||
|
||||
chunk_loop:
|
||||
// chunk size is 16
|
||||
SGTU $16, R3, R8
|
||||
BNE R0, R8, chunk_tail_8
|
||||
chunk_loop_1:
|
||||
MOVV (R1), R6
|
||||
MOVV (R2), R7
|
||||
BNE R6, R7, not_eq
|
||||
MOVV 8(R1), R12
|
||||
MOVV 8(R2), R13
|
||||
ADDV $16, R1
|
||||
ADDV $16, R2
|
||||
SUBV $16, R3
|
||||
BEQ R12, R13, chunk_loop
|
||||
JMP not_eq
|
||||
|
||||
chunk_tail_8:
|
||||
AND $8, R3, R14
|
||||
BEQ R0, R14, chunk_tail_4
|
||||
MOVV (R1), R6
|
||||
MOVV (R2), R7
|
||||
BNE R6, R7, not_eq
|
||||
ADDV $8, R1
|
||||
ADDV $8, R2
|
||||
|
||||
chunk_tail_4:
|
||||
AND $4, R3, R14
|
||||
BEQ R0, R14, chunk_tail_2
|
||||
MOVWU (R1), R6
|
||||
MOVWU (R2), R7
|
||||
BNE R6, R7, not_eq
|
||||
ADDV $4, R1
|
||||
ADDV $4, R2
|
||||
|
||||
chunk_tail_2:
|
||||
AND $2, R3, R14
|
||||
BEQ R0, R14, chunk_tail_1
|
||||
MOVHU (R1), R6
|
||||
MOVHU (R2), R7
|
||||
BNE R6, R7, not_eq
|
||||
ADDV $2, R1
|
||||
ADDV $2, R2
|
||||
|
||||
chunk_tail_1:
|
||||
AND $1, R3, R14
|
||||
BEQ R0, R14, eq
|
||||
MOVBU (R1), R6
|
||||
MOVBU (R2), R7
|
||||
BEQ R6, R7, eq
|
||||
|
||||
not_eq:
|
||||
MOVB R0, ret+24(FP)
|
||||
RET
|
||||
eq:
|
||||
|
|
|
|||
Loading…
Reference in New Issue