mirror of https://github.com/golang/go.git
120 lines
3.2 KiB
Go
120 lines
3.2 KiB
Go
// Copyright 2022 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package diff
|
|
|
|
import (
|
|
"strings"
|
|
"unicode/utf8"
|
|
|
|
"golang.org/x/tools/internal/diff/lcs"
|
|
)
|
|
|
|
// maxDiffs is a limit on how deeply the lcs algorithm should search
|
|
// the value is just a guess
|
|
const maxDiffs = 30
|
|
|
|
// Strings computes the differences between two strings.
|
|
// (Both it and the diff in the myers package have type ComputeEdits, which
|
|
// is why the arguments are strings, not []bytes.)
|
|
// TODO(adonovan): opt: consider switching everything to []bytes, if
|
|
// that's the more common type in practice. Or provide both flavors?
|
|
func Strings(before, after string) []Edit {
|
|
if before == after {
|
|
// very frequently true
|
|
return nil
|
|
}
|
|
// The diffs returned by the lcs package use indexes into
|
|
// whatever slice was passed in. Edits use byte offsets, so
|
|
// rune or line offsets need to be converted.
|
|
// TODO(adonovan): opt: eliminate all the unnecessary allocations.
|
|
var diffs []lcs.Diff
|
|
if !isASCII(before) || !isASCII(after) {
|
|
diffs, _ = lcs.Compute([]rune(before), []rune(after), maxDiffs/2)
|
|
diffs = runeOffsets(diffs, []rune(before))
|
|
} else {
|
|
// Common case: pure ASCII. Avoid expansion to []rune slice.
|
|
diffs, _ = lcs.Compute([]byte(before), []byte(after), maxDiffs/2)
|
|
}
|
|
return convertDiffs(diffs)
|
|
}
|
|
|
|
// Lines computes the differences between two list of lines.
|
|
// TODO(adonovan): unused except by its test. Do we actually need it?
|
|
func Lines(before, after []string) []Edit {
|
|
diffs, _ := lcs.Compute(before, after, maxDiffs/2)
|
|
diffs = lineOffsets(diffs, before)
|
|
return convertDiffs(diffs)
|
|
// the code is not coping with possible missing \ns at the ends
|
|
}
|
|
|
|
func convertDiffs(diffs []lcs.Diff) []Edit {
|
|
ans := make([]Edit, len(diffs))
|
|
for i, d := range diffs {
|
|
ans[i] = Edit{d.Start, d.End, d.Text}
|
|
}
|
|
return ans
|
|
}
|
|
|
|
// convert diffs with rune offsets into diffs with byte offsets
|
|
func runeOffsets(diffs []lcs.Diff, src []rune) []lcs.Diff {
|
|
var idx int
|
|
var tmp strings.Builder // string because []byte([]rune) is illegal
|
|
for i, d := range diffs {
|
|
tmp.WriteString(string(src[idx:d.Start]))
|
|
v := tmp.Len()
|
|
tmp.WriteString(string(src[d.Start:d.End]))
|
|
d.Start = v
|
|
idx = d.End
|
|
d.End = tmp.Len()
|
|
diffs[i] = d
|
|
}
|
|
return diffs
|
|
}
|
|
|
|
// convert diffs with line offsets into diffs with byte offsets
|
|
func lineOffsets(diffs []lcs.Diff, src []string) []lcs.Diff {
|
|
var idx int
|
|
var tmp strings.Builder // bytes/
|
|
for i, d := range diffs {
|
|
tmp.WriteString(strJoin(src[idx:d.Start]))
|
|
v := tmp.Len()
|
|
tmp.WriteString(strJoin(src[d.Start:d.End]))
|
|
d.Start = v
|
|
idx = d.End
|
|
d.End = tmp.Len()
|
|
diffs[i] = d
|
|
}
|
|
return diffs
|
|
}
|
|
|
|
// join lines. (strings.Join doesn't add a trailing separator)
|
|
func strJoin(elems []string) string {
|
|
if len(elems) == 0 {
|
|
return ""
|
|
}
|
|
n := 0
|
|
for i := 0; i < len(elems); i++ {
|
|
n += len(elems[i])
|
|
}
|
|
|
|
var b strings.Builder
|
|
b.Grow(n)
|
|
for _, s := range elems {
|
|
b.WriteString(s)
|
|
//b.WriteByte('\n')
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
// isASCII reports whether s contains only ASCII.
|
|
func isASCII(s string) bool {
|
|
for i := 0; i < len(s); i++ {
|
|
if s[i] >= utf8.RuneSelf {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|