mirror of https://github.com/golang/go.git
bytes, strings: add ASCII fast path to EqualFold
This commit adds an ASCII fast path to bytes/strings EqualFold that roughly doubles performance when all characters are ASCII. It also changes strings.EqualFold to use `for range` for the first string since this is ~10% faster than using utf8.DecodeRuneInString for both (see #31666). Performance (similar results on arm64 and amd64): name old time/op new time/op delta EqualFold/Tests-10 238ns ± 0% 172ns ± 1% -27.91% (p=0.000 n=10+10) EqualFold/ASCII-10 20.5ns ± 0% 9.7ns ± 0% -52.73% (p=0.000 n=10+10) EqualFold/UnicodePrefix-10 86.5ns ± 0% 77.6ns ± 0% -10.37% (p=0.000 n=10+10) EqualFold/UnicodeSuffix-10 86.8ns ± 2% 71.3ns ± 0% -17.88% (p=0.000 n=10+8) Change-Id: I058f3f97a08dc04d65af895674d85420f920abe1 Reviewed-on: https://go-review.googlesource.com/c/go/+/425459 Reviewed-by: Ian Lance Taylor <iant@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Auto-Submit: Ian Lance Taylor <iant@google.com> Run-TryBot: Ian Lance Taylor <iant@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
9c916c7901
commit
c70fd4b30a
|
|
@ -1134,6 +1134,36 @@ func ReplaceAll(s, old, new []byte) []byte {
|
||||||
// are equal under simple Unicode case-folding, which is a more general
|
// are equal under simple Unicode case-folding, which is a more general
|
||||||
// form of case-insensitivity.
|
// form of case-insensitivity.
|
||||||
func EqualFold(s, t []byte) bool {
|
func EqualFold(s, t []byte) bool {
|
||||||
|
// ASCII fast path
|
||||||
|
i := 0
|
||||||
|
for ; i < len(s) && i < len(t); i++ {
|
||||||
|
sr := s[i]
|
||||||
|
tr := t[i]
|
||||||
|
if sr|tr >= utf8.RuneSelf {
|
||||||
|
goto hasUnicode
|
||||||
|
}
|
||||||
|
|
||||||
|
// Easy case.
|
||||||
|
if tr == sr {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sr < tr to simplify what follows.
|
||||||
|
if tr < sr {
|
||||||
|
tr, sr = sr, tr
|
||||||
|
}
|
||||||
|
// ASCII only, sr/tr must be upper/lower case
|
||||||
|
if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// Check if we've exhausted both strings.
|
||||||
|
return len(s) == len(t)
|
||||||
|
|
||||||
|
hasUnicode:
|
||||||
|
s = s[i:]
|
||||||
|
t = t[i:]
|
||||||
for len(s) != 0 && len(t) != 0 {
|
for len(s) != 0 && len(t) != 0 {
|
||||||
// Extract first rune from each.
|
// Extract first rune from each.
|
||||||
var sr, tr rune
|
var sr, tr rune
|
||||||
|
|
|
||||||
|
|
@ -1067,15 +1067,44 @@ func ReplaceAll(s, old, new string) string {
|
||||||
// are equal under simple Unicode case-folding, which is a more general
|
// are equal under simple Unicode case-folding, which is a more general
|
||||||
// form of case-insensitivity.
|
// form of case-insensitivity.
|
||||||
func EqualFold(s, t string) bool {
|
func EqualFold(s, t string) bool {
|
||||||
for s != "" && t != "" {
|
// ASCII fast path
|
||||||
// Extract first rune from each string.
|
i := 0
|
||||||
var sr, tr rune
|
for ; i < len(s) && i < len(t); i++ {
|
||||||
if s[0] < utf8.RuneSelf {
|
sr := s[i]
|
||||||
sr, s = rune(s[0]), s[1:]
|
tr := t[i]
|
||||||
} else {
|
if sr|tr >= utf8.RuneSelf {
|
||||||
r, size := utf8.DecodeRuneInString(s)
|
goto hasUnicode
|
||||||
sr, s = r, s[size:]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Easy case.
|
||||||
|
if tr == sr {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sr < tr to simplify what follows.
|
||||||
|
if tr < sr {
|
||||||
|
tr, sr = sr, tr
|
||||||
|
}
|
||||||
|
// ASCII only, sr/tr must be upper/lower case
|
||||||
|
if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// Check if we've exhausted both strings.
|
||||||
|
return len(s) == len(t)
|
||||||
|
|
||||||
|
hasUnicode:
|
||||||
|
s = s[i:]
|
||||||
|
t = t[i:]
|
||||||
|
for _, sr := range s {
|
||||||
|
// If t is exhausted the strings are not equal.
|
||||||
|
if len(t) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract first rune from second string.
|
||||||
|
var tr rune
|
||||||
if t[0] < utf8.RuneSelf {
|
if t[0] < utf8.RuneSelf {
|
||||||
tr, t = rune(t[0]), t[1:]
|
tr, t = rune(t[0]), t[1:]
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -1115,8 +1144,8 @@ func EqualFold(s, t string) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// One string is empty. Are both?
|
// First string is empty, so check if the second one is also empty.
|
||||||
return s == t
|
return len(t) == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
|
// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
|
||||||
|
|
|
||||||
|
|
@ -1556,6 +1556,7 @@ func TestEqualFold(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEqualFold(b *testing.B) {
|
func BenchmarkEqualFold(b *testing.B) {
|
||||||
|
b.Run("Tests", func(b *testing.B) {
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
for _, tt := range EqualFoldTests {
|
for _, tt := range EqualFoldTests {
|
||||||
if out := EqualFold(tt.s, tt.t); out != tt.out {
|
if out := EqualFold(tt.s, tt.t); out != tt.out {
|
||||||
|
|
@ -1563,6 +1564,28 @@ func BenchmarkEqualFold(b *testing.B) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const s1 = "abcdefghijKz"
|
||||||
|
const s2 = "abcDefGhijKz"
|
||||||
|
|
||||||
|
b.Run("ASCII", func(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
EqualFold(s1, s2)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("UnicodePrefix", func(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
EqualFold("αβδ"+s1, "ΑΒΔ"+s2)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("UnicodeSuffix", func(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
EqualFold(s1+"αβδ", s2+"ΑΒΔ")
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
var CountTests = []struct {
|
var CountTests = []struct {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue