PERF: limit time spent diffing large blobs of text
REFACTOR: s/.length/.size/g
This commit is contained in:
parent
50e4ecc77e
commit
134a4c66b7
|
@ -21,7 +21,7 @@ class DiscourseDiff
|
||||||
def inline_html
|
def inline_html
|
||||||
i = 0
|
i = 0
|
||||||
inline = []
|
inline = []
|
||||||
while i < @block_by_block_diff.length
|
while i < @block_by_block_diff.size
|
||||||
op_code = @block_by_block_diff[i][1]
|
op_code = @block_by_block_diff[i][1]
|
||||||
if op_code == :common then inline << @block_by_block_diff[i][0]
|
if op_code == :common then inline << @block_by_block_diff[i][0]
|
||||||
else
|
else
|
||||||
|
@ -37,7 +37,7 @@ class DiscourseDiff
|
||||||
second = i
|
second = i
|
||||||
end
|
end
|
||||||
|
|
||||||
if i + 1 < @block_by_block_diff.length && @block_by_block_diff[i + 1][1] == opposite_op_code
|
if i + 1 < @block_by_block_diff.size && @block_by_block_diff[i + 1][1] == opposite_op_code
|
||||||
diff = ONPDiff.new(tokenize_html(@block_by_block_diff[first][0]), tokenize_html(@block_by_block_diff[second][0])).diff
|
diff = ONPDiff.new(tokenize_html(@block_by_block_diff[first][0]), tokenize_html(@block_by_block_diff[second][0])).diff
|
||||||
inline << generate_inline_html(diff)
|
inline << generate_inline_html(diff)
|
||||||
i += 1
|
i += 1
|
||||||
|
@ -54,7 +54,7 @@ class DiscourseDiff
|
||||||
def side_by_side_html
|
def side_by_side_html
|
||||||
i = 0
|
i = 0
|
||||||
left, right = [], []
|
left, right = [], []
|
||||||
while i < @block_by_block_diff.length
|
while i < @block_by_block_diff.size
|
||||||
op_code = @block_by_block_diff[i][1]
|
op_code = @block_by_block_diff[i][1]
|
||||||
if op_code == :common
|
if op_code == :common
|
||||||
left << @block_by_block_diff[i][0]
|
left << @block_by_block_diff[i][0]
|
||||||
|
@ -74,7 +74,7 @@ class DiscourseDiff
|
||||||
second = i
|
second = i
|
||||||
end
|
end
|
||||||
|
|
||||||
if i + 1 < @block_by_block_diff.length && @block_by_block_diff[i + 1][1] == opposite_op_code
|
if i + 1 < @block_by_block_diff.size && @block_by_block_diff[i + 1][1] == opposite_op_code
|
||||||
diff = ONPDiff.new(tokenize_html(@block_by_block_diff[first][0]), tokenize_html(@block_by_block_diff[second][0])).diff
|
diff = ONPDiff.new(tokenize_html(@block_by_block_diff[first][0]), tokenize_html(@block_by_block_diff[second][0])).diff
|
||||||
deleted, inserted = generate_side_by_side_html(diff)
|
deleted, inserted = generate_side_by_side_html(diff)
|
||||||
left << deleted
|
left << deleted
|
||||||
|
@ -93,7 +93,7 @@ class DiscourseDiff
|
||||||
def side_by_side_markdown
|
def side_by_side_markdown
|
||||||
i = 0
|
i = 0
|
||||||
table = ["<table class=\"markdown\">"]
|
table = ["<table class=\"markdown\">"]
|
||||||
while i < @line_by_line_diff.length
|
while i < @line_by_line_diff.size
|
||||||
table << "<tr>"
|
table << "<tr>"
|
||||||
op_code = @line_by_line_diff[i][1]
|
op_code = @line_by_line_diff[i][1]
|
||||||
if op_code == :common
|
if op_code == :common
|
||||||
|
@ -110,9 +110,9 @@ class DiscourseDiff
|
||||||
second = i
|
second = i
|
||||||
end
|
end
|
||||||
|
|
||||||
if i + 1 < @line_by_line_diff.length && @line_by_line_diff[i + 1][1] == opposite_op_code
|
if i + 1 < @line_by_line_diff.size && @line_by_line_diff[i + 1][1] == opposite_op_code
|
||||||
before_tokens, after_tokens = tokenize_markdown(@line_by_line_diff[first][0]), tokenize_markdown(@line_by_line_diff[second][0])
|
before_tokens, after_tokens = tokenize_markdown(@line_by_line_diff[first][0]), tokenize_markdown(@line_by_line_diff[second][0])
|
||||||
if (before_tokens.length - after_tokens.length).abs > MAX_DIFFERENCE
|
if (before_tokens.size - after_tokens.size).abs > MAX_DIFFERENCE
|
||||||
before_tokens, after_tokens = tokenize_line(@line_by_line_diff[first][0]), tokenize_line(@line_by_line_diff[second][0])
|
before_tokens, after_tokens = tokenize_line(@line_by_line_diff[first][0]), tokenize_line(@line_by_line_diff[second][0])
|
||||||
end
|
end
|
||||||
diff = ONPDiff.new(before_tokens, after_tokens).short_diff
|
diff = ONPDiff.new(before_tokens, after_tokens).short_diff
|
||||||
|
@ -147,25 +147,25 @@ class DiscourseDiff
|
||||||
def tokenize_markdown(text)
|
def tokenize_markdown(text)
|
||||||
t, tokens = [], []
|
t, tokens = [], []
|
||||||
i = 0
|
i = 0
|
||||||
while i < text.length
|
while i < text.size
|
||||||
if text[i] =~ /\w/
|
if text[i] =~ /\w/
|
||||||
t << text[i]
|
t << text[i]
|
||||||
elsif text[i] =~ /[ \t]/ && t.join =~ /^\w+$/
|
elsif text[i] =~ /[ \t]/ && t.join =~ /^\w+$/
|
||||||
begin
|
begin
|
||||||
t << text[i]
|
t << text[i]
|
||||||
i += 1
|
i += 1
|
||||||
end while i < text.length && text[i] =~ /[ \t]/
|
end while i < text.size && text[i] =~ /[ \t]/
|
||||||
i -= 1
|
i -= 1
|
||||||
tokens << t.join
|
tokens << t.join
|
||||||
t = []
|
t = []
|
||||||
else
|
else
|
||||||
tokens << t.join if t.length > 0
|
tokens << t.join if t.size > 0
|
||||||
tokens << text[i]
|
tokens << text[i]
|
||||||
t = []
|
t = []
|
||||||
end
|
end
|
||||||
i += 1
|
i += 1
|
||||||
end
|
end
|
||||||
tokens << t.join if t.length > 0
|
tokens << t.join if t.size > 0
|
||||||
tokens
|
tokens
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -180,7 +180,7 @@ class DiscourseDiff
|
||||||
def add_class_or_wrap_in_tags(html_or_text, klass)
|
def add_class_or_wrap_in_tags(html_or_text, klass)
|
||||||
result = html_or_text.dup
|
result = html_or_text.dup
|
||||||
index_of_next_chevron = result.index(">")
|
index_of_next_chevron = result.index(">")
|
||||||
if result.length > 0 && result[0] == '<' && index_of_next_chevron
|
if result.size > 0 && result[0] == '<' && index_of_next_chevron
|
||||||
index_of_class = result.index("class=")
|
index_of_class = result.index("class=")
|
||||||
if index_of_class.nil? || index_of_class > index_of_next_chevron
|
if index_of_class.nil? || index_of_class > index_of_next_chevron
|
||||||
# we do not have a class for the current tag
|
# we do not have a class for the current tag
|
||||||
|
@ -192,7 +192,7 @@ class DiscourseDiff
|
||||||
if classes.include?("diff-#{klass}")
|
if classes.include?("diff-#{klass}")
|
||||||
result
|
result
|
||||||
else
|
else
|
||||||
result.insert(index_of_class + "class=".length + 1, "diff-#{klass} ")
|
result.insert(index_of_class + "class=".size + 1, "diff-#{klass} ")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
# Use "An O(NP) Sequence Comparison Algorithm" as described by Sun Wu, Udi Manber and Gene Myers
|
# Use "An O(NP) Sequence Comparison Algorithm" as described by Sun Wu, Udi Manber and Gene Myers
|
||||||
# in http://www.itu.dk/stud/speciale/bepjea/xwebtex/litt/an-onp-sequence-comparison-algorithm.pdf
|
# in https://publications.mpi-cbg.de/Wu_1990_6334.pdf
|
||||||
|
|
||||||
class ONPDiff
|
class ONPDiff
|
||||||
|
|
||||||
def initialize(a, b)
|
def initialize(a, b)
|
||||||
@a, @b = a, b
|
@a, @b = a, b
|
||||||
@m, @n = a.length, b.length
|
@m, @n = a.size, b.size
|
||||||
@backtrack = []
|
@backtrack = []
|
||||||
if @reverse = @m > @n
|
if @reverse = @m > @n
|
||||||
@a, @b = @b, @a
|
@a, @b = @b, @a
|
||||||
|
@ -30,13 +31,15 @@ class ONPDiff
|
||||||
return @shortest_path if @shortest_path
|
return @shortest_path if @shortest_path
|
||||||
|
|
||||||
size = @m + @n + 3
|
size = @m + @n + 3
|
||||||
fp = Array.new(size) { |i| -1 }
|
fp = Array.new(size, -1)
|
||||||
@path = Array.new(size) { |i| -1 }
|
@path = Array.new(size, -1)
|
||||||
p = -1
|
p = -1
|
||||||
|
|
||||||
begin
|
begin
|
||||||
p += 1
|
p += 1
|
||||||
|
|
||||||
|
return (@shortest_path = []) if p >= 1000
|
||||||
|
|
||||||
k = -p
|
k = -p
|
||||||
while k <= @delta - 1
|
while k <= @delta - 1
|
||||||
fp[k + @offset] = snake(k, fp[k - 1 + @offset] + 1, fp[k + 1 + @offset])
|
fp[k + @offset] = snake(k, fp[k - 1 + @offset] + 1, fp[k + 1 + @offset])
|
||||||
|
@ -56,6 +59,7 @@ class ONPDiff
|
||||||
r = @path[@delta + @offset]
|
r = @path[@delta + @offset]
|
||||||
|
|
||||||
@shortest_path = []
|
@shortest_path = []
|
||||||
|
|
||||||
while r != -1
|
while r != -1
|
||||||
@shortest_path << [@backtrack[r][0], @backtrack[r][1]]
|
@shortest_path << [@backtrack[r][0], @backtrack[r][1]]
|
||||||
r = @backtrack[r][2]
|
r = @backtrack[r][2]
|
||||||
|
@ -74,7 +78,7 @@ class ONPDiff
|
||||||
y += 1
|
y += 1
|
||||||
end
|
end
|
||||||
|
|
||||||
@path[k + @offset] = @backtrack.length
|
@path[k + @offset] = @backtrack.size
|
||||||
@backtrack << [x, y, r]
|
@backtrack << [x, y, r]
|
||||||
|
|
||||||
y
|
y
|
||||||
|
@ -84,7 +88,7 @@ class ONPDiff
|
||||||
ses = []
|
ses = []
|
||||||
x, y = 1, 1
|
x, y = 1, 1
|
||||||
px, py = 0, 0
|
px, py = 0, 0
|
||||||
i = shortest_path.length - 1
|
i = shortest_path.size - 1
|
||||||
while i >= 0
|
while i >= 0
|
||||||
while px < shortest_path[i][0] || py < shortest_path[i][1]
|
while px < shortest_path[i][0] || py < shortest_path[i][1]
|
||||||
if shortest_path[i][1] - shortest_path[i][0] > py - px
|
if shortest_path[i][1] - shortest_path[i][0] > py - px
|
||||||
|
@ -114,12 +118,12 @@ class ONPDiff
|
||||||
ses = []
|
ses = []
|
||||||
x, y = 1, 1
|
x, y = 1, 1
|
||||||
px, py = 0, 0
|
px, py = 0, 0
|
||||||
i = shortest_path.length - 1
|
i = shortest_path.size - 1
|
||||||
while i >= 0
|
while i >= 0
|
||||||
while px < shortest_path[i][0] || py < shortest_path[i][1]
|
while px < shortest_path[i][0] || py < shortest_path[i][1]
|
||||||
if shortest_path[i][1] - shortest_path[i][0] > py - px
|
if shortest_path[i][1] - shortest_path[i][0] > py - px
|
||||||
t = @reverse ? :delete : :add
|
t = @reverse ? :delete : :add
|
||||||
if ses.length > 0 && ses[-1][1] == t
|
if ses.size > 0 && ses[-1][1] == t
|
||||||
ses[-1][0] << @b[py]
|
ses[-1][0] << @b[py]
|
||||||
else
|
else
|
||||||
ses << [@b[py], t]
|
ses << [@b[py], t]
|
||||||
|
@ -128,7 +132,7 @@ class ONPDiff
|
||||||
py += 1
|
py += 1
|
||||||
elsif shortest_path[i][1] - shortest_path[i][0] < py - px
|
elsif shortest_path[i][1] - shortest_path[i][0] < py - px
|
||||||
t = @reverse ? :add : :delete
|
t = @reverse ? :add : :delete
|
||||||
if ses.length > 0 && ses[-1][1] == t
|
if ses.size > 0 && ses[-1][1] == t
|
||||||
ses[-1][0] << @a[px]
|
ses[-1][0] << @a[px]
|
||||||
else
|
else
|
||||||
ses << [@a[px], t]
|
ses << [@a[px], t]
|
||||||
|
@ -136,7 +140,7 @@ class ONPDiff
|
||||||
x += 1
|
x += 1
|
||||||
px += 1
|
px += 1
|
||||||
else
|
else
|
||||||
if ses.length > 0 && ses[-1][1] == :common
|
if ses.size > 0 && ses[-1][1] == :common
|
||||||
ses[-1][0] << @a[px]
|
ses[-1][0] << @a[px]
|
||||||
else
|
else
|
||||||
ses << [@a[px], :common]
|
ses << [@a[px], :common]
|
||||||
|
|
|
@ -299,7 +299,7 @@ class PostRevisor
|
||||||
max_diff = SiteSetting.editing_grace_period_max_diff_high_trust.to_i
|
max_diff = SiteSetting.editing_grace_period_max_diff_high_trust.to_i
|
||||||
end
|
end
|
||||||
|
|
||||||
if (original_raw.length - new_raw.length).abs > max_diff ||
|
if (original_raw.size - new_raw.size).abs > max_diff ||
|
||||||
diff_size(original_raw, new_raw) > max_diff
|
diff_size(original_raw, new_raw) > max_diff
|
||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
|
|
|
@ -15,6 +15,12 @@ describe ONPDiff do
|
||||||
expect(ONPDiff.new("abcd", "abef").diff).to eq([["a", :common], ["b", :common], ["e", :add], ["f", :add], ["c", :delete], ["d", :delete]])
|
expect(ONPDiff.new("abcd", "abef").diff).to eq([["a", :common], ["b", :common], ["e", :add], ["f", :add], ["c", :delete], ["d", :delete]])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "bails out on large diffs" do
|
||||||
|
a = SecureRandom.alphanumeric(5_000)
|
||||||
|
b = SecureRandom.alphanumeric(5_000)
|
||||||
|
expect(ONPDiff.new(a, b).diff).to eq([])
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
describe "short_diff" do
|
describe "short_diff" do
|
||||||
|
|
Loading…
Reference in New Issue