mirror of https://github.com/golang/go.git
531 lines
14 KiB
Go
531 lines
14 KiB
Go
// Copyright 2022 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package lcs
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
// non generic code. The names have Old at the end to indicate they are the
|
|
// the implementation that doesn't use generics.
|
|
|
|
// Compute the Diffs and the lcs.
|
|
func Compute(a, b interface{}, limit int) ([]Diff, lcs) {
|
|
var ans lcs
|
|
g := newegraph(a, b, limit)
|
|
ans = g.twosided()
|
|
diffs := g.fromlcs(ans)
|
|
return diffs, ans
|
|
}
|
|
|
|
// editGraph carries the information for computing the lcs for []byte, []rune, or []string.
|
|
type editGraph struct {
|
|
eq eq // how to compare elements of A, B, and convert slices to strings
|
|
vf, vb label // forward and backward labels
|
|
|
|
limit int // maximal value of D
|
|
// the bounding rectangle of the current edit graph
|
|
lx, ly, ux, uy int
|
|
delta int // common subexpression: (ux-lx)-(uy-ly)
|
|
}
|
|
|
|
// abstraction in place of generic
|
|
type eq interface {
|
|
eq(i, j int) bool
|
|
substr(i, j int) string // string from b[i:j]
|
|
lena() int
|
|
lenb() int
|
|
}
|
|
|
|
type byteeq struct {
|
|
a, b []byte // the input was ascii. perhaps these could be strings
|
|
}
|
|
|
|
func (x *byteeq) eq(i, j int) bool { return x.a[i] == x.b[j] }
|
|
func (x *byteeq) substr(i, j int) string { return string(x.b[i:j]) }
|
|
func (x *byteeq) lena() int { return int(len(x.a)) }
|
|
func (x *byteeq) lenb() int { return int(len(x.b)) }
|
|
|
|
type runeeq struct {
|
|
a, b []rune
|
|
}
|
|
|
|
func (x *runeeq) eq(i, j int) bool { return x.a[i] == x.b[j] }
|
|
func (x *runeeq) substr(i, j int) string { return string(x.b[i:j]) }
|
|
func (x *runeeq) lena() int { return int(len(x.a)) }
|
|
func (x *runeeq) lenb() int { return int(len(x.b)) }
|
|
|
|
type lineeq struct {
|
|
a, b []string
|
|
}
|
|
|
|
func (x *lineeq) eq(i, j int) bool { return x.a[i] == x.b[j] }
|
|
func (x *lineeq) substr(i, j int) string { return strings.Join(x.b[i:j], "") }
|
|
func (x *lineeq) lena() int { return int(len(x.a)) }
|
|
func (x *lineeq) lenb() int { return int(len(x.b)) }
|
|
|
|
func neweq(a, b interface{}) eq {
|
|
switch x := a.(type) {
|
|
case []byte:
|
|
return &byteeq{a: x, b: b.([]byte)}
|
|
case []rune:
|
|
return &runeeq{a: x, b: b.([]rune)}
|
|
case []string:
|
|
return &lineeq{a: x, b: b.([]string)}
|
|
default:
|
|
panic(fmt.Sprintf("unexpected type %T in neweq", x))
|
|
}
|
|
}
|
|
|
|
func (g *editGraph) fromlcs(lcs lcs) []Diff {
|
|
var ans []Diff
|
|
var pa, pb int // offsets in a, b
|
|
for _, l := range lcs {
|
|
if pa < l.X && pb < l.Y {
|
|
ans = append(ans, Diff{pa, l.X, g.eq.substr(pb, l.Y)})
|
|
} else if pa < l.X {
|
|
ans = append(ans, Diff{pa, l.X, ""})
|
|
} else if pb < l.Y {
|
|
ans = append(ans, Diff{pa, l.X, g.eq.substr(pb, l.Y)})
|
|
}
|
|
pa = l.X + l.Len
|
|
pb = l.Y + l.Len
|
|
}
|
|
if pa < g.eq.lena() && pb < g.eq.lenb() {
|
|
ans = append(ans, Diff{pa, g.eq.lena(), g.eq.substr(pb, g.eq.lenb())})
|
|
} else if pa < g.eq.lena() {
|
|
ans = append(ans, Diff{pa, g.eq.lena(), ""})
|
|
} else if pb < g.eq.lenb() {
|
|
ans = append(ans, Diff{pa, g.eq.lena(), g.eq.substr(pb, g.eq.lenb())})
|
|
}
|
|
return ans
|
|
}
|
|
|
|
func newegraph(a, b interface{}, limit int) *editGraph {
|
|
if limit <= 0 {
|
|
limit = 1 << 25 // effectively infinity
|
|
}
|
|
var alen, blen int
|
|
switch a := a.(type) {
|
|
case []byte:
|
|
alen, blen = len(a), len(b.([]byte))
|
|
case []rune:
|
|
alen, blen = len(a), len(b.([]rune))
|
|
case []string:
|
|
alen, blen = len(a), len(b.([]string))
|
|
default:
|
|
panic(fmt.Sprintf("unexpected type %T in newegraph", a))
|
|
}
|
|
ans := &editGraph{eq: neweq(a, b), vf: newtriang(limit), vb: newtriang(limit), limit: int(limit),
|
|
ux: alen, uy: blen, delta: alen - blen}
|
|
return ans
|
|
}
|
|
|
|
// --- FORWARD ---
|
|
// fdone decides if the forwward path has reached the upper right
|
|
// corner of the rectangele. If so, it also returns the computed lcs.
|
|
func (e *editGraph) fdone(D, k int) (bool, lcs) {
|
|
// x, y, k are relative to the rectangle
|
|
x := e.vf.get(D, k)
|
|
y := x - k
|
|
if x == e.ux && y == e.uy {
|
|
return true, e.forwardlcs(D, k)
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
// run the forward algorithm, until success or up to the limit on D.
|
|
func (e *editGraph) forward() lcs {
|
|
e.setForward(0, 0, e.lx)
|
|
if ok, ans := e.fdone(0, 0); ok {
|
|
return ans
|
|
}
|
|
// from D to D+1
|
|
for D := 0; D < e.limit; D++ {
|
|
e.setForward(D+1, -(D + 1), e.getForward(D, -D))
|
|
if ok, ans := e.fdone(D+1, -(D + 1)); ok {
|
|
return ans
|
|
}
|
|
e.setForward(D+1, D+1, e.getForward(D, D)+1)
|
|
if ok, ans := e.fdone(D+1, D+1); ok {
|
|
return ans
|
|
}
|
|
for k := -D + 1; k <= D-1; k += 2 {
|
|
// these are tricky and easy to get backwards
|
|
lookv := e.lookForward(k, e.getForward(D, k-1)+1)
|
|
lookh := e.lookForward(k, e.getForward(D, k+1))
|
|
if lookv > lookh {
|
|
e.setForward(D+1, k, lookv)
|
|
} else {
|
|
e.setForward(D+1, k, lookh)
|
|
}
|
|
if ok, ans := e.fdone(D+1, k); ok {
|
|
return ans
|
|
}
|
|
}
|
|
}
|
|
// D is too large
|
|
// find the D path with maximal x+y inside the rectangle and
|
|
// use that to compute the found part of the lcs
|
|
kmax := -e.limit - 1
|
|
diagmax := -1
|
|
for k := -e.limit; k <= e.limit; k += 2 {
|
|
x := e.getForward(e.limit, k)
|
|
y := x - k
|
|
if x+y > diagmax && x <= e.ux && y <= e.uy {
|
|
diagmax, kmax = x+y, k
|
|
}
|
|
}
|
|
return e.forwardlcs(e.limit, kmax)
|
|
}
|
|
|
|
// recover the lcs by backtracking from the farthest point reached
|
|
func (e *editGraph) forwardlcs(D, k int) lcs {
|
|
var ans lcs
|
|
for x := e.getForward(D, k); x != 0 || x-k != 0; {
|
|
if ok(D-1, k-1) && x-1 == e.getForward(D-1, k-1) {
|
|
// if (x-1,y) is labelled D-1, x--,D--,k--,continue
|
|
D, k, x = D-1, k-1, x-1
|
|
continue
|
|
} else if ok(D-1, k+1) && x == e.getForward(D-1, k+1) {
|
|
// if (x,y-1) is labelled D-1, x, D--,k++, continue
|
|
D, k = D-1, k+1
|
|
continue
|
|
}
|
|
// if (x-1,y-1)--(x,y) is a diagonal, prepend,x--,y--, continue
|
|
y := x - k
|
|
realx, realy := x+e.lx, y+e.ly
|
|
if e.eq.eq(realx-1, realy-1) {
|
|
ans = prependlcs(ans, realx-1, realy-1)
|
|
x--
|
|
} else {
|
|
panic("broken path")
|
|
}
|
|
}
|
|
return ans
|
|
}
|
|
|
|
// start at (x,y), go up the diagonal as far as possible,
|
|
// and label the result with d
|
|
func (e *editGraph) lookForward(k, relx int) int {
|
|
rely := relx - k
|
|
x, y := relx+e.lx, rely+e.ly
|
|
for x < e.ux && y < e.uy && e.eq.eq(x, y) {
|
|
x++
|
|
y++
|
|
}
|
|
return x
|
|
}
|
|
|
|
func (e *editGraph) setForward(d, k, relx int) {
|
|
x := e.lookForward(k, relx)
|
|
e.vf.set(d, k, x-e.lx)
|
|
}
|
|
|
|
func (e *editGraph) getForward(d, k int) int {
|
|
x := e.vf.get(d, k)
|
|
return x
|
|
}
|
|
|
|
// --- BACKWARD ---
|
|
// bdone decides if the backward path has reached the lower left corner
|
|
func (e *editGraph) bdone(D, k int) (bool, lcs) {
|
|
// x, y, k are relative to the rectangle
|
|
x := e.vb.get(D, k)
|
|
y := x - (k + e.delta)
|
|
if x == 0 && y == 0 {
|
|
return true, e.backwardlcs(D, k)
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
// run the backward algorithm, until success or up to the limit on D.
|
|
func (e *editGraph) backward() lcs {
|
|
e.setBackward(0, 0, e.ux)
|
|
if ok, ans := e.bdone(0, 0); ok {
|
|
return ans
|
|
}
|
|
// from D to D+1
|
|
for D := 0; D < e.limit; D++ {
|
|
e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
|
|
if ok, ans := e.bdone(D+1, -(D + 1)); ok {
|
|
return ans
|
|
}
|
|
e.setBackward(D+1, D+1, e.getBackward(D, D))
|
|
if ok, ans := e.bdone(D+1, D+1); ok {
|
|
return ans
|
|
}
|
|
for k := -D + 1; k <= D-1; k += 2 {
|
|
// these are tricky and easy to get wrong
|
|
lookv := e.lookBackward(k, e.getBackward(D, k-1))
|
|
lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
|
|
if lookv < lookh {
|
|
e.setBackward(D+1, k, lookv)
|
|
} else {
|
|
e.setBackward(D+1, k, lookh)
|
|
}
|
|
if ok, ans := e.bdone(D+1, k); ok {
|
|
return ans
|
|
}
|
|
}
|
|
}
|
|
|
|
// D is too large
|
|
// find the D path with minimal x+y inside the rectangle and
|
|
// use that to compute the part of the lcs found
|
|
kmax := -e.limit - 1
|
|
diagmin := 1 << 25
|
|
for k := -e.limit; k <= e.limit; k += 2 {
|
|
x := e.getBackward(e.limit, k)
|
|
y := x - (k + e.delta)
|
|
if x+y < diagmin && x >= 0 && y >= 0 {
|
|
diagmin, kmax = x+y, k
|
|
}
|
|
}
|
|
if kmax < -e.limit {
|
|
panic(fmt.Sprintf("no paths when limit=%d?", e.limit))
|
|
}
|
|
return e.backwardlcs(e.limit, kmax)
|
|
}
|
|
|
|
// recover the lcs by backtracking
|
|
func (e *editGraph) backwardlcs(D, k int) lcs {
|
|
var ans lcs
|
|
for x := e.getBackward(D, k); x != e.ux || x-(k+e.delta) != e.uy; {
|
|
if ok(D-1, k-1) && x == e.getBackward(D-1, k-1) {
|
|
// D--, k--, x unchanged
|
|
D, k = D-1, k-1
|
|
continue
|
|
} else if ok(D-1, k+1) && x+1 == e.getBackward(D-1, k+1) {
|
|
// D--, k++, x++
|
|
D, k, x = D-1, k+1, x+1
|
|
continue
|
|
}
|
|
y := x - (k + e.delta)
|
|
realx, realy := x+e.lx, y+e.ly
|
|
if e.eq.eq(realx, realy) {
|
|
ans = appendlcs(ans, realx, realy)
|
|
x++
|
|
} else {
|
|
panic("broken path")
|
|
}
|
|
}
|
|
return ans
|
|
}
|
|
|
|
// start at (x,y), go down the diagonal as far as possible,
|
|
func (e *editGraph) lookBackward(k, relx int) int {
|
|
rely := relx - (k + e.delta) // forward k = k + e.delta
|
|
x, y := relx+e.lx, rely+e.ly
|
|
for x > 0 && y > 0 && e.eq.eq(x-1, y-1) {
|
|
x--
|
|
y--
|
|
}
|
|
return x
|
|
}
|
|
|
|
// convert to rectangle, and label the result with d
|
|
func (e *editGraph) setBackward(d, k, relx int) {
|
|
x := e.lookBackward(k, relx)
|
|
e.vb.set(d, k, x-e.lx)
|
|
}
|
|
|
|
func (e *editGraph) getBackward(d, k int) int {
|
|
x := e.vb.get(d, k)
|
|
return x
|
|
}
|
|
|
|
// -- TWOSIDED ---
|
|
|
|
func (e *editGraph) twosided() lcs {
|
|
// The termination condition could be improved, as either the forward
|
|
// or backward pass could succeed before Myers' Lemma applies.
|
|
// Aside from questions of efficiency (is the extra testing cost-effective)
|
|
// this is more likely to matter when e.limit is reached.
|
|
e.setForward(0, 0, e.lx)
|
|
e.setBackward(0, 0, e.ux)
|
|
|
|
// from D to D+1
|
|
for D := 0; D < e.limit; D++ {
|
|
// just finished a backwards pass, so check
|
|
if got, ok := e.twoDone(D, D); ok {
|
|
return e.twolcs(D, D, got)
|
|
}
|
|
// do a forwards pass (D to D+1)
|
|
e.setForward(D+1, -(D + 1), e.getForward(D, -D))
|
|
e.setForward(D+1, D+1, e.getForward(D, D)+1)
|
|
for k := -D + 1; k <= D-1; k += 2 {
|
|
// these are tricky and easy to get backwards
|
|
lookv := e.lookForward(k, e.getForward(D, k-1)+1)
|
|
lookh := e.lookForward(k, e.getForward(D, k+1))
|
|
if lookv > lookh {
|
|
e.setForward(D+1, k, lookv)
|
|
} else {
|
|
e.setForward(D+1, k, lookh)
|
|
}
|
|
}
|
|
// just did a forward pass, so check
|
|
if got, ok := e.twoDone(D+1, D); ok {
|
|
return e.twolcs(D+1, D, got)
|
|
}
|
|
// do a backward pass, D to D+1
|
|
e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
|
|
e.setBackward(D+1, D+1, e.getBackward(D, D))
|
|
for k := -D + 1; k <= D-1; k += 2 {
|
|
// these are tricky and easy to get wrong
|
|
lookv := e.lookBackward(k, e.getBackward(D, k-1))
|
|
lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
|
|
if lookv < lookh {
|
|
e.setBackward(D+1, k, lookv)
|
|
} else {
|
|
e.setBackward(D+1, k, lookh)
|
|
}
|
|
}
|
|
}
|
|
|
|
// D too large. combine a forward and backward partial lcs
|
|
// first, a forward one
|
|
kmax := -e.limit - 1
|
|
diagmax := -1
|
|
for k := -e.limit; k <= e.limit; k += 2 {
|
|
x := e.getForward(e.limit, k)
|
|
y := x - k
|
|
if x+y > diagmax && x <= e.ux && y <= e.uy {
|
|
diagmax, kmax = x+y, k
|
|
}
|
|
}
|
|
if kmax < -e.limit {
|
|
panic(fmt.Sprintf("no forward paths when limit=%d?", e.limit))
|
|
}
|
|
lcs := e.forwardlcs(e.limit, kmax)
|
|
// now a backward one
|
|
// find the D path with minimal x+y inside the rectangle and
|
|
// use that to compute the lcs
|
|
diagmin := 1 << 25 // infinity
|
|
for k := -e.limit; k <= e.limit; k += 2 {
|
|
x := e.getBackward(e.limit, k)
|
|
y := x - (k + e.delta)
|
|
if x+y < diagmin && x >= 0 && y >= 0 {
|
|
diagmin, kmax = x+y, k
|
|
}
|
|
}
|
|
if kmax < -e.limit {
|
|
panic(fmt.Sprintf("no backward paths when limit=%d?", e.limit))
|
|
}
|
|
lcs = append(lcs, e.backwardlcs(e.limit, kmax)...)
|
|
// These may overlap (e.forwardlcs and e.backwardlcs return sorted lcs)
|
|
ans := lcs.fix()
|
|
return ans
|
|
}
|
|
|
|
// Does Myers' Lemma apply?
|
|
func (e *editGraph) twoDone(df, db int) (int, bool) {
|
|
if (df+db+e.delta)%2 != 0 {
|
|
return 0, false // diagonals cannot overlap
|
|
}
|
|
kmin := -db + e.delta
|
|
if -df > kmin {
|
|
kmin = -df
|
|
}
|
|
kmax := db + e.delta
|
|
if df < kmax {
|
|
kmax = df
|
|
}
|
|
for k := kmin; k <= kmax; k += 2 {
|
|
x := e.vf.get(df, k)
|
|
u := e.vb.get(db, k-e.delta)
|
|
if u <= x {
|
|
// is it worth looking at all the other k?
|
|
for l := k; l <= kmax; l += 2 {
|
|
x := e.vf.get(df, l)
|
|
y := x - l
|
|
u := e.vb.get(db, l-e.delta)
|
|
v := u - l
|
|
if x == u || u == 0 || v == 0 || y == e.uy || x == e.ux {
|
|
return l, true
|
|
}
|
|
}
|
|
return k, true
|
|
}
|
|
}
|
|
return 0, false
|
|
}
|
|
|
|
func (e *editGraph) twolcs(df, db, kf int) lcs {
|
|
// db==df || db+1==df
|
|
x := e.vf.get(df, kf)
|
|
y := x - kf
|
|
kb := kf - e.delta
|
|
u := e.vb.get(db, kb)
|
|
v := u - kf
|
|
|
|
// Myers proved there is a df-path from (0,0) to (u,v)
|
|
// and a db-path from (x,y) to (N,M).
|
|
// In the first case the overall path is the forward path
|
|
// to (u,v) followed by the backward path to (N,M).
|
|
// In the second case the path is the backward path to (x,y)
|
|
// followed by the forward path to (x,y) from (0,0).
|
|
|
|
// Look for some special cases to avoid computing either of these paths.
|
|
if x == u {
|
|
// "babaab" "cccaba"
|
|
// already patched together
|
|
lcs := e.forwardlcs(df, kf)
|
|
lcs = append(lcs, e.backwardlcs(db, kb)...)
|
|
return lcs.sort()
|
|
}
|
|
|
|
// is (u-1,v) or (u,v-1) labelled df-1?
|
|
// if so, that forward df-1-path plus a horizontal or vertical edge
|
|
// is the df-path to (u,v), then plus the db-path to (N,M)
|
|
if u > 0 && ok(df-1, u-1-v) && e.vf.get(df-1, u-1-v) == u-1 {
|
|
// "aabbab" "cbcabc"
|
|
lcs := e.forwardlcs(df-1, u-1-v)
|
|
lcs = append(lcs, e.backwardlcs(db, kb)...)
|
|
return lcs.sort()
|
|
}
|
|
if v > 0 && ok(df-1, (u-(v-1))) && e.vf.get(df-1, u-(v-1)) == u {
|
|
// "abaabb" "bcacab"
|
|
lcs := e.forwardlcs(df-1, u-(v-1))
|
|
lcs = append(lcs, e.backwardlcs(db, kb)...)
|
|
return lcs.sort()
|
|
}
|
|
|
|
// The path can't possibly contribute to the lcs because it
|
|
// is all horizontal or vertical edges
|
|
if u == 0 || v == 0 || x == e.ux || y == e.uy {
|
|
// "abaabb" "abaaaa"
|
|
if u == 0 || v == 0 {
|
|
return e.backwardlcs(db, kb)
|
|
}
|
|
return e.forwardlcs(df, kf)
|
|
}
|
|
|
|
// is (x+1,y) or (x,y+1) labelled db-1?
|
|
if x+1 <= e.ux && ok(db-1, x+1-y-e.delta) && e.vb.get(db-1, x+1-y-e.delta) == x+1 {
|
|
// "bababb" "baaabb"
|
|
lcs := e.backwardlcs(db-1, kb+1)
|
|
lcs = append(lcs, e.forwardlcs(df, kf)...)
|
|
return lcs.sort()
|
|
}
|
|
if y+1 <= e.uy && ok(db-1, x-(y+1)-e.delta) && e.vb.get(db-1, x-(y+1)-e.delta) == x {
|
|
// "abbbaa" "cabacc"
|
|
lcs := e.backwardlcs(db-1, kb-1)
|
|
lcs = append(lcs, e.forwardlcs(df, kf)...)
|
|
return lcs.sort()
|
|
}
|
|
|
|
// need to compute another path
|
|
// "aabbaa" "aacaba"
|
|
lcs := e.backwardlcs(db, kb)
|
|
oldx, oldy := e.ux, e.uy
|
|
e.ux = u
|
|
e.uy = v
|
|
lcs = append(lcs, e.forward()...)
|
|
e.ux, e.uy = oldx, oldy
|
|
return lcs.sort()
|
|
}
|