mirror of https://github.com/golang/go.git
internal/bytealg: optimize Index/IndexString in amd64
Now with PCALIGN available on amd64 we can start optimizing some routines that benefit from instruction alignment.
```
│ sec/op │ sec/op vs base │
IndexByte/4K-16 69.89n ± ∞ ¹ 45.88n ± ∞ ¹ -34.35% (p=0.008 n=5)
IndexByte/4M-16 65.36µ ± ∞ ¹ 47.32µ ± ∞ ¹ -27.60% (p=0.008 n=5)
IndexByte/64M-16 1.435m ± ∞ ¹ 1.140m ± ∞ ¹ -20.57% (p=0.008 n=5)
│ B/s │ B/s vs base │
IndexByte/4K-16 54.58Gi ± ∞ ¹ 83.14Gi ± ∞ ¹ +52.32% (p=0.008 n=5)
IndexByte/4M-16 59.76Gi ± ∞ ¹ 82.54Gi ± ∞ ¹ +38.12% (p=0.008 n=5)
IndexByte/64M-16 43.56Gi ± ∞ ¹ 54.84Gi ± ∞ ¹ +25.89% (p=0.008 n=5)
```
Change-Id: Iff3dfd542c55e7569242be81f38b2887b9e04e87
GitHub-Last-Rev: f309f898b1
GitHub-Pull-Request: golang/go#61792
Reviewed-on: https://go-review.googlesource.com/c/go/+/516435
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
parent
460dc37c88
commit
dfc1437580
|
|
@ -39,6 +39,7 @@ no_sse42:
|
|||
JA _3_or_more
|
||||
MOVW (R8), R8
|
||||
LEAQ -1(DI)(DX*1), DX
|
||||
PCALIGN $16
|
||||
loop2:
|
||||
MOVW (DI), SI
|
||||
CMPW SI,R8
|
||||
|
|
@ -250,6 +251,7 @@ sse42:
|
|||
LEAQ -15(DI)(DX*1), SI
|
||||
MOVQ $16, R9
|
||||
SUBQ AX, R9 // We advance by 16-len(sep) each iteration, so precalculate it into R9
|
||||
PCALIGN $16
|
||||
loop_sse42:
|
||||
// 0x0c means: unsigned byte compare (bits 0,1 are 00)
|
||||
// for equality (bits 2,3 are 11)
|
||||
|
|
|
|||
Loading…
Reference in New Issue