FIX: Remove newlines from img alt & title in HTML to markdown parser (#25473)
We were having a minor issue with emails with embedded images that had newlines in the alt string; for example: ``` <p class="MsoNormal"><span style="font-size:11.0pt"><img width="898" height="498" style="width:9.3541in;height:5.1875in" id="Picture_x0020_5" src="cid:image003.png@01DA4EBA.0400B610" alt="A screenshot of a computer program Description automatically generated"></span><span style="font-size:11.0pt"><o:p></o:p></span></p> ``` Once this was parsed and converted to markdown (or directly to HTML in some cases), this caused an issue in the composer and the post UI, where the markdown parser didn't know how to deal with this, making the HTML show directly instead of showing an image. The easiest way to deal with this is to just strip \n from image alt and title attrs in the HTMLToMarkdown class.
This commit is contained in:
parent
72eb29e5c3
commit
575bc4af73
|
@ -21,6 +21,10 @@ class HtmlToMarkdown
|
|||
|
||||
private
|
||||
|
||||
def strip_newlines(string)
|
||||
string.gsub(/\n/, " ")&.squeeze(" ")
|
||||
end
|
||||
|
||||
def remove_not_allowed!(doc)
|
||||
allowed = Set.new
|
||||
|
||||
|
@ -167,16 +171,18 @@ class HtmlToMarkdown
|
|||
def visit_img(node)
|
||||
return if node["src"].blank?
|
||||
|
||||
node["alt"] = strip_newlines(node["alt"]) if node["alt"].present?
|
||||
node["title"] = strip_newlines(node["title"]) if node["title"].present?
|
||||
|
||||
if @opts[:keep_img_tags]
|
||||
node.to_html
|
||||
elsif @opts[:keep_cid_imgs] && node["src"].start_with?("cid:")
|
||||
node.to_html
|
||||
elsif node["src"].start_with?(*ALLOWED_IMG_SRCS)
|
||||
title = node["alt"].presence || node["title"].presence
|
||||
width = node["width"].to_i
|
||||
height = node["height"].to_i
|
||||
dimensions = "|#{width}x#{height}" if width > 0 && height > 0
|
||||
"![#{title}#{dimensions}](#{node["src"]})"
|
||||
"![#{node["alt"] || node["title"]}#{dimensions}](#{node["src"]})"
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -150,6 +150,14 @@ RSpec.describe HtmlToMarkdown do
|
|||
expect(html_to_markdown(HTML_WITH_IMG, keep_img_tags: true)).to eq(HTML_WITH_IMG)
|
||||
end
|
||||
|
||||
it "removes newlines from img alt text" do
|
||||
html_with_alt_newlines =
|
||||
%Q{<img src="https://www.discourse.org/logo.svg" alt="Discourse\n\nLogo">}
|
||||
expect(html_to_markdown(html_with_alt_newlines)).to eq(
|
||||
"![Discourse Logo](https://www.discourse.org/logo.svg)",
|
||||
)
|
||||
end
|
||||
|
||||
it "removes empty & invalid <img>" do
|
||||
expect(html_to_markdown("<img>")).to eq("")
|
||||
expect(html_to_markdown(%Q{<img src="">})).to eq("")
|
||||
|
@ -160,6 +168,13 @@ RSpec.describe HtmlToMarkdown do
|
|||
expect(html_to_markdown(HTML_WITH_CID_IMG, keep_cid_imgs: true)).to eq(HTML_WITH_CID_IMG)
|
||||
end
|
||||
|
||||
it "removes newlines from img alt text with cid images" do
|
||||
html_with_cid_alt_newlines = %Q{<img src="cid:ii_1525434659ddb4cb" title="Discourse\n\nLogo">}
|
||||
expect(html_to_markdown(html_with_cid_alt_newlines, keep_cid_imgs: true)).to eq(
|
||||
%Q{<img src="cid:ii_1525434659ddb4cb" title="Discourse Logo">},
|
||||
)
|
||||
end
|
||||
|
||||
it "skips hidden <img>" do
|
||||
expect(html_to_markdown(%Q{<img src="https://www.discourse.org/logo.svg" width=0>})).to eq("")
|
||||
expect(html_to_markdown(%Q{<img src="https://www.discourse.org/logo.svg" height="0">})).to eq(
|
||||
|
|
Loading…
Reference in New Issue