PERF: only diff HTML / Markdown when needed (#30014)

When serializing the `body_changes` in the `PostRevisionSerializer`, we create two diffs: one for the `cooked` and another one for the `raw` version of the post.

Inside `DiscourseDiff`, we generate both `html` and `markdown` diffs when we only need the `html` diffs for the `cooked` version of the post and the `markdown` diff for the `raw` version of the post.

This solves the issue repored in https://meta.discourse.org/t/server-error-accessing-topic-revisions-on-a-specific-topic/339185 where some revisions would return 500 because of a `ArgumentError : Attributes per element limit exceeded` exception when trying to generate the `html` diff on a very large `raw`.
This commit is contained in:
Régis Hanol 2024-11-30 16:30:30 +01:00 committed by GitHub
parent bf3e75ca70
commit 20d46c9583
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 44 additions and 33 deletions

View File

@ -6,22 +6,15 @@ class DiscourseDiff
def initialize(before, after)
@before = before
@after = after
before_html = tokenize_html_blocks(@before)
after_html = tokenize_html_blocks(@after)
before_markdown = tokenize_line(CGI.escapeHTML(@before))
after_markdown = tokenize_line(CGI.escapeHTML(@after))
@block_by_block_diff = ONPDiff.new(before_html, after_html).paragraph_diff
@line_by_line_diff = ONPDiff.new(before_markdown, after_markdown).short_diff
end
def inline_html
i = 0
inline = []
while i < @block_by_block_diff.size
op_code = @block_by_block_diff[i][1]
while i < block_by_block_diff.size
op_code = block_by_block_diff[i][1]
if op_code == :common
inline << @block_by_block_diff[i][0]
inline << block_by_block_diff[i][0]
else
if op_code == :delete
opposite_op_code = :add
@ -35,16 +28,16 @@ class DiscourseDiff
second = i
end
if i + 1 < @block_by_block_diff.size && @block_by_block_diff[i + 1][1] == opposite_op_code
if i + 1 < block_by_block_diff.size && block_by_block_diff[i + 1][1] == opposite_op_code
diff =
ONPDiff.new(
tokenize_html(@block_by_block_diff[first][0]),
tokenize_html(@block_by_block_diff[second][0]),
tokenize_html(block_by_block_diff[first][0]),
tokenize_html(block_by_block_diff[second][0]),
).diff
inline << generate_inline_html(diff)
i += 1
else
inline << add_class_or_wrap_in_tags(@block_by_block_diff[i][0], klass)
inline << add_class_or_wrap_in_tags(block_by_block_diff[i][0], klass)
end
end
i += 1
@ -56,11 +49,11 @@ class DiscourseDiff
def side_by_side_html
i = 0
left, right = [], []
while i < @block_by_block_diff.size
op_code = @block_by_block_diff[i][1]
while i < block_by_block_diff.size
op_code = block_by_block_diff[i][1]
if op_code == :common
left << @block_by_block_diff[i][0]
right << @block_by_block_diff[i][0]
left << block_by_block_diff[i][0]
right << block_by_block_diff[i][0]
else
if op_code == :delete
opposite_op_code = :add
@ -76,18 +69,18 @@ class DiscourseDiff
second = i
end
if i + 1 < @block_by_block_diff.size && @block_by_block_diff[i + 1][1] == opposite_op_code
if i + 1 < block_by_block_diff.size && block_by_block_diff[i + 1][1] == opposite_op_code
diff =
ONPDiff.new(
tokenize_html(@block_by_block_diff[first][0]),
tokenize_html(@block_by_block_diff[second][0]),
tokenize_html(block_by_block_diff[first][0]),
tokenize_html(block_by_block_diff[second][0]),
).diff
deleted, inserted = generate_side_by_side_html(diff)
left << deleted
right << inserted
i += 1
else
side << add_class_or_wrap_in_tags(@block_by_block_diff[i][0], klass)
side << add_class_or_wrap_in_tags(block_by_block_diff[i][0], klass)
end
end
i += 1
@ -99,12 +92,12 @@ class DiscourseDiff
def side_by_side_markdown
i = 0
table = ["<table class=\"markdown\">"]
while i < @line_by_line_diff.size
while i < line_by_line_diff.size
table << "<tr>"
op_code = @line_by_line_diff[i][1]
op_code = line_by_line_diff[i][1]
if op_code == :common
table << "<td>#{@line_by_line_diff[i][0]}</td>"
table << "<td>#{@line_by_line_diff[i][0]}</td>"
table << "<td>#{line_by_line_diff[i][0]}</td>"
table << "<td>#{line_by_line_diff[i][0]}</td>"
else
if op_code == :delete
opposite_op_code = :add
@ -116,14 +109,14 @@ class DiscourseDiff
second = i
end
if i + 1 < @line_by_line_diff.size && @line_by_line_diff[i + 1][1] == opposite_op_code
if i + 1 < line_by_line_diff.size && line_by_line_diff[i + 1][1] == opposite_op_code
before_tokens, after_tokens =
tokenize_markdown(@line_by_line_diff[first][0]),
tokenize_markdown(@line_by_line_diff[second][0])
tokenize_markdown(line_by_line_diff[first][0]),
tokenize_markdown(line_by_line_diff[second][0])
if (before_tokens.size - after_tokens.size).abs > MAX_DIFFERENCE
before_tokens, after_tokens =
tokenize_line(@line_by_line_diff[first][0]),
tokenize_line(@line_by_line_diff[second][0])
tokenize_line(line_by_line_diff[first][0]),
tokenize_line(line_by_line_diff[second][0])
end
diff = ONPDiff.new(before_tokens, after_tokens).short_diff
deleted, inserted = generate_side_by_side_markdown(diff)
@ -132,11 +125,11 @@ class DiscourseDiff
i += 1
else
if op_code == :delete
table << "<td class=\"diff-del\">#{@line_by_line_diff[i][0]}</td>"
table << "<td class=\"diff-del\">#{line_by_line_diff[i][0]}</td>"
table << "<td></td>"
else
table << "<td></td>"
table << "<td class=\"diff-ins\">#{@line_by_line_diff[i][0]}</td>"
table << "<td class=\"diff-ins\">#{line_by_line_diff[i][0]}</td>"
end
end
end
@ -150,6 +143,24 @@ class DiscourseDiff
private
def block_by_block_diff
@block_by_block_diff ||=
begin
before_html = tokenize_html_blocks(@before)
after_html = tokenize_html_blocks(@after)
ONPDiff.new(before_html, after_html).paragraph_diff
end
end
def line_by_line_diff
@line_by_line_diff ||=
begin
before_markdown = tokenize_line(CGI.escapeHTML(@before))
after_markdown = tokenize_line(CGI.escapeHTML(@after))
ONPDiff.new(before_markdown, after_markdown).short_diff
end
end
def tokenize_line(text)
text.scan(/[^\r\n]+[\r\n]*/)
end