mirror of https://github.com/golang/go.git
unicode: improve SimpleFold performance by 2x for non-foldable code points
Change SimpleFold to search the CaseRanges table only once when no
folding is specified for the rune (previously up to two searches could
be performed). This improves performance by 2x for runes that have no
folds or are already upper case. As a side effect this improves the
performance of To by roughly ~15%
goos: darwin
goarch: arm64
pkg: unicode
cpu: Apple M1 Max
│ base.10.txt │ new.10.txt │
│ sec/op │ sec/op vs base │
ToUpper-10 11.860n ± 1% 9.731n ± 1% -17.95% (p=0.000 n=10)
ToLower-10 12.31n ± 1% 10.34n ± 1% -16.00% (p=0.000 n=10)
SimpleFold/Upper-10 19.16n ± 0% 15.98n ± 1% -16.64% (p=0.000 n=10)
SimpleFold/Lower-10 32.41n ± 1% 17.09n ± 1% -47.27% (p=0.000 n=10)
SimpleFold/Fold-10 8.884n ± 4% 8.856n ± 8% ~ (p=0.700 n=10)
SimpleFold/NoFold-10 30.87n ± 0% 15.49n ± 3% -49.84% (p=0.000 n=10)
geomean 17.09n 12.47n -26.99%
Change-Id: I6e5c7554106842955aadeef7b266c4c7944d3a97
Reviewed-on: https://go-review.googlesource.com/c/go/+/454958
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
This commit is contained in:
parent
57f4cf2099
commit
b8e533a7cd
|
|
@ -206,34 +206,17 @@ func IsTitle(r rune) bool {
|
|||
return isExcludingLatin(Title, r)
|
||||
}
|
||||
|
||||
// to maps the rune using the specified case mapping.
|
||||
// It additionally reports whether caseRange contained a mapping for r.
|
||||
func to(_case int, r rune, caseRange []CaseRange) (mappedRune rune, foundMapping bool) {
|
||||
if _case < 0 || MaxCase <= _case {
|
||||
return ReplacementChar, false // as reasonable an error as any
|
||||
}
|
||||
// lookupCaseRange returns the CaseRange mapping for rune r or nil if no
|
||||
// mapping exists for r.
|
||||
func lookupCaseRange(r rune, caseRange []CaseRange) *CaseRange {
|
||||
// binary search over ranges
|
||||
lo := 0
|
||||
hi := len(caseRange)
|
||||
for lo < hi {
|
||||
m := int(uint(lo+hi) >> 1)
|
||||
cr := caseRange[m]
|
||||
cr := &caseRange[m]
|
||||
if rune(cr.Lo) <= r && r <= rune(cr.Hi) {
|
||||
delta := cr.Delta[_case]
|
||||
if delta > MaxRune {
|
||||
// In an Upper-Lower sequence, which always starts with
|
||||
// an UpperCase letter, the real deltas always look like:
|
||||
// {0, 1, 0} UpperCase (Lower is next)
|
||||
// {-1, 0, -1} LowerCase (Upper, Title are previous)
|
||||
// The characters at even offsets from the beginning of the
|
||||
// sequence are upper case; the ones at odd offsets are lower.
|
||||
// The correct mapping can be done by clearing or setting the low
|
||||
// bit in the sequence offset.
|
||||
// The constants UpperCase and TitleCase are even while LowerCase
|
||||
// is odd so we take the low bit from _case.
|
||||
return rune(cr.Lo) + ((r-rune(cr.Lo))&^1 | rune(_case&1)), true
|
||||
}
|
||||
return r + delta, true
|
||||
return cr
|
||||
}
|
||||
if r < rune(cr.Lo) {
|
||||
hi = m
|
||||
|
|
@ -241,6 +224,37 @@ func to(_case int, r rune, caseRange []CaseRange) (mappedRune rune, foundMapping
|
|||
lo = m + 1
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// convertCase converts r to _case using CaseRange cr.
|
||||
func convertCase(_case int, r rune, cr *CaseRange) rune {
|
||||
delta := cr.Delta[_case]
|
||||
if delta > MaxRune {
|
||||
// In an Upper-Lower sequence, which always starts with
|
||||
// an UpperCase letter, the real deltas always look like:
|
||||
// {0, 1, 0} UpperCase (Lower is next)
|
||||
// {-1, 0, -1} LowerCase (Upper, Title are previous)
|
||||
// The characters at even offsets from the beginning of the
|
||||
// sequence are upper case; the ones at odd offsets are lower.
|
||||
// The correct mapping can be done by clearing or setting the low
|
||||
// bit in the sequence offset.
|
||||
// The constants UpperCase and TitleCase are even while LowerCase
|
||||
// is odd so we take the low bit from _case.
|
||||
return rune(cr.Lo) + ((r-rune(cr.Lo))&^1 | rune(_case&1))
|
||||
}
|
||||
return r + delta
|
||||
}
|
||||
|
||||
// to maps the rune using the specified case mapping.
|
||||
// It additionally reports whether caseRange contained a mapping for r.
|
||||
func to(_case int, r rune, caseRange []CaseRange) (mappedRune rune, foundMapping bool) {
|
||||
if _case < 0 || MaxCase <= _case {
|
||||
return ReplacementChar, false // as reasonable an error as any
|
||||
}
|
||||
if cr := lookupCaseRange(r, caseRange); cr != nil {
|
||||
return convertCase(_case, r, cr), true
|
||||
}
|
||||
return r, false
|
||||
}
|
||||
|
||||
|
|
@ -364,8 +378,11 @@ func SimpleFold(r rune) rune {
|
|||
// No folding specified. This is a one- or two-element
|
||||
// equivalence class containing rune and ToLower(rune)
|
||||
// and ToUpper(rune) if they are different from rune.
|
||||
if l := ToLower(r); l != r {
|
||||
return l
|
||||
if cr := lookupCaseRange(r, CaseRanges); cr != nil {
|
||||
if l := convertCase(LowerCase, r, cr); l != r {
|
||||
return l
|
||||
}
|
||||
return convertCase(UpperCase, r, cr)
|
||||
}
|
||||
return ToUpper(r)
|
||||
return r
|
||||
}
|
||||
|
|
|
|||
|
|
@ -642,3 +642,29 @@ func TestNegativeRune(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkToUpper(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = ToUpper('δ')
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkToLower(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = ToLower('Δ')
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSimpleFold(b *testing.B) {
|
||||
bench := func(name string, r rune) {
|
||||
b.Run(name, func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = SimpleFold(r)
|
||||
}
|
||||
})
|
||||
}
|
||||
bench("Upper", 'Δ')
|
||||
bench("Lower", 'δ')
|
||||
bench("Fold", '\u212A')
|
||||
bench("NoFold", '習')
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue