bytes, strings: speed up Split{,After}Seq

CL 669735 brought a welcome performance boost to splitSeq; however, it
rendered explodeSeq ineligible for inlining and failed to update that
function's doc comment.

This CL inlines the call to explodeSeq in splitSeq, thereby unlocking
a further speedup in the case of an empty separator, and removes
function explodeSeq altogether.

Some benchmarks results:

goos: darwin
goarch: amd64
pkg: strings
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
                                   │     old     │                 new                  │
                                   │   sec/op    │    sec/op     vs base                │
SplitSeqEmptySeparator-8             5.136m ± 6%    3.180m ± 6%  -38.09% (p=0.000 n=20)
SplitSeqSingleByteSeparator-8        995.9µ ± 1%    988.4µ ± 0%   -0.75% (p=0.000 n=20)
SplitSeqMultiByteSeparator-8         593.1µ ± 2%    591.7µ ± 1%        ~ (p=0.253 n=20)
SplitAfterSeqEmptySeparator-8        5.554m ± 3%    3.432m ± 2%  -38.20% (p=0.000 n=20)
SplitAfterSeqSingleByteSeparator-8   997.4µ ± 0%   1000.0µ ± 8%        ~ (p=0.121 n=20)
SplitAfterSeqMultiByteSeparator-8    591.7µ ± 1%    588.9µ ± 0%   -0.48% (p=0.004 n=20)
geomean                              1.466m         1.247m       -14.97%

                                   │     old      │                 new                 │
                                   │     B/op     │    B/op     vs base                 │
SplitSeqEmptySeparator-8             0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=20) ¹
SplitSeqSingleByteSeparator-8        0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=20) ¹
SplitSeqMultiByteSeparator-8         0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=20) ¹
SplitAfterSeqEmptySeparator-8        0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=20) ¹
SplitAfterSeqSingleByteSeparator-8   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=20) ¹
SplitAfterSeqMultiByteSeparator-8    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=20) ¹
geomean                                         ²               +0.00%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean
This commit is contained in:
Julien Cretel 2025-05-13 12:42:10 +02:00
parent a2fbb50322
commit 344934071f
No known key found for this signature in database
GPG Key ID: 9BC102DCCA7031A9
2 changed files with 14 additions and 24 deletions

View File

@ -31,23 +31,18 @@ func Lines(s []byte) iter.Seq[[]byte] {
}
}
// explodeSeq returns an iterator over the runes in s.
func explodeSeq(s []byte, yield func([]byte) bool) {
for len(s) > 0 {
_, size := utf8.DecodeRune(s)
if !yield(s[:size:size]) {
return
}
s = s[size:]
}
}
// splitSeq is SplitSeq or SplitAfterSeq, configured by how many
// bytes of sep to include in the results (none or all).
func splitSeq(s, sep []byte, sepSave int) iter.Seq[[]byte] {
return func(yield func([]byte) bool) {
if len(sep) == 0 {
explodeSeq(s, yield)
for len(s) > 0 {
_, size := utf8.DecodeRune(s)
if !yield(s[:size:size]) {
return
}
s = s[size:]
}
return
}
for {

View File

@ -31,23 +31,18 @@ func Lines(s string) iter.Seq[string] {
}
}
// explodeSeq returns an iterator over the runes in s.
func explodeSeq(s string, yield func(string) bool) {
for len(s) > 0 {
_, size := utf8.DecodeRuneInString(s)
if !yield(s[:size]) {
return
}
s = s[size:]
}
}
// splitSeq is SplitSeq or SplitAfterSeq, configured by how many
// bytes of sep to include in the results (none or all).
func splitSeq(s, sep string, sepSave int) iter.Seq[string] {
return func(yield func(string) bool) {
if len(sep) == 0 {
explodeSeq(s, yield)
for len(s) > 0 {
_, size := utf8.DecodeRuneInString(s)
if !yield(s[:size]) {
return
}
s = s[size:]
}
return
}
for {