154 lines
3.4 KiB
Ruby
154 lines
3.4 KiB
Ruby
|
# Use "An O(NP) Sequence Comparison Algorithm" as described by Sun Wu, Udi Manber and Gene Myers
|
||
|
# in http://www.itu.dk/stud/speciale/bepjea/xwebtex/litt/an-onp-sequence-comparison-algorithm.pdf
|
||
|
class ONPDiff
|
||
|
|
||
|
def initialize(a, b)
|
||
|
@a, @b = a, b
|
||
|
@m, @n = a.length, b.length
|
||
|
@backtrack = []
|
||
|
if @reverse = @m > @n
|
||
|
@a, @b = @b, @a
|
||
|
@m, @n = @n, @m
|
||
|
end
|
||
|
@offset = @m + 1
|
||
|
@delta = @n - @m
|
||
|
end
|
||
|
|
||
|
def diff
|
||
|
@diff ||= build_diff_script(compose)
|
||
|
end
|
||
|
|
||
|
def short_diff
|
||
|
@short_diff ||= build_short_diff_script(compose)
|
||
|
end
|
||
|
|
||
|
private
|
||
|
|
||
|
def compose
|
||
|
return @shortest_path if @shortest_path
|
||
|
|
||
|
size = @m + @n + 3
|
||
|
fp = Array.new(size) { |i| -1 }
|
||
|
@path = Array.new(size) { |i| -1 }
|
||
|
p = -1
|
||
|
|
||
|
begin
|
||
|
p += 1
|
||
|
|
||
|
k = -p
|
||
|
while k <= @delta - 1
|
||
|
fp[k + @offset] = snake(k, fp[k - 1 + @offset] + 1, fp[k + 1 + @offset])
|
||
|
k += 1
|
||
|
end
|
||
|
|
||
|
k = @delta + p
|
||
|
while k >= @delta + 1
|
||
|
fp[k + @offset] = snake(k, fp[k - 1 + @offset] + 1, fp[k + 1 + @offset])
|
||
|
k -= 1
|
||
|
end
|
||
|
|
||
|
fp[@delta + @offset] = snake(@delta, fp[@delta - 1 + @offset] + 1, fp[@delta + 1 + @offset])
|
||
|
|
||
|
end until fp[@delta + @offset] == @n
|
||
|
|
||
|
r = @path[@delta + @offset]
|
||
|
|
||
|
@shortest_path = []
|
||
|
while r != -1
|
||
|
@shortest_path << [@backtrack[r][0], @backtrack[r][1]]
|
||
|
r = @backtrack[r][2]
|
||
|
end
|
||
|
|
||
|
@shortest_path
|
||
|
end
|
||
|
|
||
|
def snake(k, p, pp)
|
||
|
r = p > pp ? @path[k - 1 + @offset] : @path[k + 1 + @offset]
|
||
|
y = [p, pp].max
|
||
|
x = y - k
|
||
|
|
||
|
while x < @m && y < @n && @a[x] == @b[y]
|
||
|
x += 1
|
||
|
y += 1
|
||
|
end
|
||
|
|
||
|
@path[k + @offset] = @backtrack.length
|
||
|
@backtrack << [x, y, r]
|
||
|
|
||
|
y
|
||
|
end
|
||
|
|
||
|
def build_diff_script(shortest_path)
|
||
|
ses = []
|
||
|
x, y = 1, 1
|
||
|
px, py = 0, 0
|
||
|
i = shortest_path.length - 1
|
||
|
while i >= 0
|
||
|
while px < shortest_path[i][0] || py < shortest_path[i][1]
|
||
|
if shortest_path[i][1] - shortest_path[i][0] > py - px
|
||
|
t = @reverse ? :delete : :add
|
||
|
ses << [@b[py], t]
|
||
|
y += 1
|
||
|
py += 1
|
||
|
elsif shortest_path[i][1] - shortest_path[i][0] < py - px
|
||
|
t = @reverse ? :add : :delete
|
||
|
ses << [@a[px], t]
|
||
|
x += 1
|
||
|
px += 1
|
||
|
else
|
||
|
ses << [@a[px], :common]
|
||
|
x += 1
|
||
|
y += 1
|
||
|
px += 1
|
||
|
py += 1
|
||
|
end
|
||
|
end
|
||
|
i -= 1
|
||
|
end
|
||
|
ses
|
||
|
end
|
||
|
|
||
|
def build_short_diff_script(shortest_path)
|
||
|
ses = []
|
||
|
x, y = 1, 1
|
||
|
px, py = 0, 0
|
||
|
i = shortest_path.length - 1
|
||
|
while i >= 0
|
||
|
while px < shortest_path[i][0] || py < shortest_path[i][1]
|
||
|
if shortest_path[i][1] - shortest_path[i][0] > py - px
|
||
|
t = @reverse ? :delete : :add
|
||
|
if ses.length > 0 && ses[-1][1] == t
|
||
|
ses[-1][0] << @b[py]
|
||
|
else
|
||
|
ses << [@b[py], t]
|
||
|
end
|
||
|
y += 1
|
||
|
py += 1
|
||
|
elsif shortest_path[i][1] - shortest_path[i][0] < py - px
|
||
|
t = @reverse ? :add : :delete
|
||
|
if ses.length > 0 && ses[-1][1] == t
|
||
|
ses[-1][0] << @a[px]
|
||
|
else
|
||
|
ses << [@a[px], t]
|
||
|
end
|
||
|
x += 1
|
||
|
px += 1
|
||
|
else
|
||
|
if ses.length > 0 && ses[-1][1] == :common
|
||
|
ses[-1][0] << @a[px]
|
||
|
else
|
||
|
ses << [@a[px], :common]
|
||
|
end
|
||
|
x += 1
|
||
|
y += 1
|
||
|
px += 1
|
||
|
py += 1
|
||
|
end
|
||
|
end
|
||
|
i -= 1
|
||
|
end
|
||
|
ses
|
||
|
end
|
||
|
|
||
|
end
|