mirror of https://github.com/golang/go.git
internal/lsp: update the fuzzy matcher to operate on chunks
We can avoid allocating strings when performing workspace symbol search
by having the fuzzy match operate directly on chunks.
When operating on a single string, this slows down the matcher slightly
(perhaps 10%) due to copying bytes rather than accessing the string
directly. We could work around this using unsafe, but this could also be
mitigated by generics.
Benchmark ("test" in x/tools): 48ms->46ms
Benchmark ("test" in kubernetes): 868ms->857ms
Change-Id: Icf0f15aaa5cc3c875cf157a7b90db801045d9ed4
Reviewed-on: https://go-review.googlesource.com/c/tools/+/338694
Trust: Robert Findley <rfindley@google.com>
Run-TryBot: Robert Findley <rfindley@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
This commit is contained in:
parent
0d28b7d7c5
commit
15eebf7e82
|
|
@ -27,23 +27,23 @@ const (
|
|||
// RuneRoles detects the roles of each byte rune in an input string and stores it in the output
|
||||
// slice. The rune role depends on the input type. Stops when it parsed all the runes in the string
|
||||
// or when it filled the output. If output is nil, then it gets created.
|
||||
func RuneRoles(str string, reuse []RuneRole) []RuneRole {
|
||||
func RuneRoles(candidate []byte, reuse []RuneRole) []RuneRole {
|
||||
var output []RuneRole
|
||||
if cap(reuse) < len(str) {
|
||||
output = make([]RuneRole, 0, len(str))
|
||||
if cap(reuse) < len(candidate) {
|
||||
output = make([]RuneRole, 0, len(candidate))
|
||||
} else {
|
||||
output = reuse[:0]
|
||||
}
|
||||
|
||||
prev, prev2 := rtNone, rtNone
|
||||
for i := 0; i < len(str); i++ {
|
||||
r := rune(str[i])
|
||||
for i := 0; i < len(candidate); i++ {
|
||||
r := rune(candidate[i])
|
||||
|
||||
role := RNone
|
||||
|
||||
curr := rtLower
|
||||
if str[i] <= unicode.MaxASCII {
|
||||
curr = runeType(rt[str[i]] - '0')
|
||||
if candidate[i] <= unicode.MaxASCII {
|
||||
curr = runeType(rt[candidate[i]] - '0')
|
||||
}
|
||||
|
||||
if curr == rtLower {
|
||||
|
|
@ -58,7 +58,7 @@ func RuneRoles(str string, reuse []RuneRole) []RuneRole {
|
|||
if prev == rtUpper {
|
||||
// This and previous characters are both upper case.
|
||||
|
||||
if i+1 == len(str) {
|
||||
if i+1 == len(candidate) {
|
||||
// This is last character, previous was also uppercase -> this is UCTail
|
||||
// i.e., (current char is C): aBC / BC / ABC
|
||||
role = RUCTail
|
||||
|
|
@ -118,11 +118,26 @@ func LastSegment(input string, roles []RuneRole) string {
|
|||
return input[start+1 : end+1]
|
||||
}
|
||||
|
||||
// ToLower transforms the input string to lower case, which is stored in the output byte slice.
|
||||
// fromChunks copies string chunks into the given buffer.
|
||||
func fromChunks(chunks []string, buffer []byte) []byte {
|
||||
ii := 0
|
||||
for _, chunk := range chunks {
|
||||
for i := 0; i < len(chunk); i++ {
|
||||
if ii >= cap(buffer) {
|
||||
break
|
||||
}
|
||||
buffer[ii] = chunk[i]
|
||||
ii++
|
||||
}
|
||||
}
|
||||
return buffer[:ii]
|
||||
}
|
||||
|
||||
// toLower transforms the input string to lower case, which is stored in the output byte slice.
|
||||
// The lower casing considers only ASCII values - non ASCII values are left unmodified.
|
||||
// Stops when parsed all input or when it filled the output slice. If output is nil, then it gets
|
||||
// created.
|
||||
func ToLower(input string, reuse []byte) []byte {
|
||||
func toLower(input []byte, reuse []byte) []byte {
|
||||
output := reuse
|
||||
if cap(reuse) < len(input) {
|
||||
output = make([]byte, len(input))
|
||||
|
|
@ -130,7 +145,7 @@ func ToLower(input string, reuse []byte) []byte {
|
|||
|
||||
for i := 0; i < len(input); i++ {
|
||||
r := rune(input[i])
|
||||
if r <= unicode.MaxASCII {
|
||||
if input[i] <= unicode.MaxASCII {
|
||||
if 'A' <= r && r <= 'Z' {
|
||||
r += 'a' - 'A'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ func rolesString(roles []fuzzy.RuneRole) string {
|
|||
func TestRoles(t *testing.T) {
|
||||
for _, tc := range rolesTests {
|
||||
gotRoles := make([]fuzzy.RuneRole, len(tc.str))
|
||||
fuzzy.RuneRoles(tc.str, gotRoles)
|
||||
fuzzy.RuneRoles([]byte(tc.str), gotRoles)
|
||||
got := rolesString(gotRoles)
|
||||
if got != tc.want {
|
||||
t.Errorf("roles(%s) = %v; want %v", tc.str, got, tc.want)
|
||||
|
|
@ -68,7 +68,7 @@ var wordSplitTests = []struct {
|
|||
|
||||
func TestWordSplit(t *testing.T) {
|
||||
for _, tc := range wordSplitTests {
|
||||
roles := fuzzy.RuneRoles(tc.input, nil)
|
||||
roles := fuzzy.RuneRoles([]byte(tc.input), nil)
|
||||
|
||||
var got []string
|
||||
consumer := func(i, j int) {
|
||||
|
|
@ -120,7 +120,7 @@ var lastSegmentSplitTests = []struct {
|
|||
|
||||
func TestLastSegment(t *testing.T) {
|
||||
for _, tc := range lastSegmentSplitTests {
|
||||
roles := fuzzy.RuneRoles(tc.str, nil)
|
||||
roles := fuzzy.RuneRoles([]byte(tc.str), nil)
|
||||
|
||||
got := fuzzy.LastSegment(tc.str, roles)
|
||||
|
||||
|
|
@ -135,7 +135,7 @@ func BenchmarkRoles(b *testing.B) {
|
|||
out := make([]fuzzy.RuneRole, len(str))
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
fuzzy.RuneRoles(str, out)
|
||||
fuzzy.RuneRoles([]byte(str), out)
|
||||
}
|
||||
b.SetBytes(int64(len(str)))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,8 +51,12 @@ type Matcher struct {
|
|||
lastCandidateLen int // in bytes
|
||||
lastCandidateMatched bool
|
||||
|
||||
// Here we save the last candidate in lower-case. This is basically a byte slice we reuse for
|
||||
// performance reasons, so the slice is not reallocated for every candidate.
|
||||
// Reusable buffers to avoid allocating for every candidate.
|
||||
// - inputBuf stores the concatenated input chunks
|
||||
// - lowerBuf stores the last candidate in lower-case
|
||||
// - rolesBuf stores the calculated roles for each rune in the last
|
||||
// candidate.
|
||||
inputBuf [MaxInputSize]byte
|
||||
lowerBuf [MaxInputSize]byte
|
||||
rolesBuf [MaxInputSize]RuneRole
|
||||
}
|
||||
|
|
@ -72,7 +76,7 @@ func NewMatcher(pattern string) *Matcher {
|
|||
|
||||
m := &Matcher{
|
||||
pattern: pattern,
|
||||
patternLower: ToLower(pattern, nil),
|
||||
patternLower: toLower([]byte(pattern), nil),
|
||||
}
|
||||
|
||||
for i, c := range m.patternLower {
|
||||
|
|
@ -88,7 +92,7 @@ func NewMatcher(pattern string) *Matcher {
|
|||
m.patternShort = m.patternLower
|
||||
}
|
||||
|
||||
m.patternRoles = RuneRoles(pattern, nil)
|
||||
m.patternRoles = RuneRoles([]byte(pattern), nil)
|
||||
|
||||
if len(pattern) > 0 {
|
||||
maxCharScore := 4
|
||||
|
|
@ -102,10 +106,15 @@ func NewMatcher(pattern string) *Matcher {
|
|||
// This is not designed for parallel use. Multiple candidates must be scored sequentially.
|
||||
// Returns a score between 0 and 1 (0 - no match, 1 - perfect match).
|
||||
func (m *Matcher) Score(candidate string) float32 {
|
||||
return m.ScoreChunks([]string{candidate})
|
||||
}
|
||||
|
||||
func (m *Matcher) ScoreChunks(chunks []string) float32 {
|
||||
candidate := fromChunks(chunks, m.inputBuf[:])
|
||||
if len(candidate) > MaxInputSize {
|
||||
candidate = candidate[:MaxInputSize]
|
||||
}
|
||||
lower := ToLower(candidate, m.lowerBuf[:])
|
||||
lower := toLower(candidate, m.lowerBuf[:])
|
||||
m.lastCandidateLen = len(candidate)
|
||||
|
||||
if len(m.pattern) == 0 {
|
||||
|
|
@ -174,7 +183,7 @@ func (m *Matcher) MatchedRanges() []int {
|
|||
return ret
|
||||
}
|
||||
|
||||
func (m *Matcher) match(candidate string, candidateLower []byte) bool {
|
||||
func (m *Matcher) match(candidate []byte, candidateLower []byte) bool {
|
||||
i, j := 0, 0
|
||||
for ; i < len(candidateLower) && j < len(m.patternLower); i++ {
|
||||
if candidateLower[i] == m.patternLower[j] {
|
||||
|
|
@ -192,7 +201,7 @@ func (m *Matcher) match(candidate string, candidateLower []byte) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
func (m *Matcher) computeScore(candidate string, candidateLower []byte) int {
|
||||
func (m *Matcher) computeScore(candidate []byte, candidateLower []byte) int {
|
||||
pattLen, candLen := len(m.pattern), len(candidate)
|
||||
|
||||
for j := 0; j <= len(m.pattern); j++ {
|
||||
|
|
|
|||
|
|
@ -233,8 +233,7 @@ func parseQuery(q string) matcherFunc {
|
|||
default:
|
||||
fm := fuzzy.NewMatcher(field)
|
||||
f = func(chunks []string) (int, float64) {
|
||||
s := strings.Join(chunks, "")
|
||||
score := float64(fm.Score(s))
|
||||
score := float64(fm.ScoreChunks(chunks))
|
||||
ranges := fm.MatchedRanges()
|
||||
if len(ranges) > 0 {
|
||||
return ranges[0], score
|
||||
|
|
|
|||
Loading…
Reference in New Issue