FIX: HtmlToMarkdown should not convert empty/bad <img> tags

This commit is contained in:
Régis Hanol 2017-05-03 18:29:25 +02:00
parent c880af8120
commit bff36de130
2 changed files with 19 additions and 8 deletions

View File

@ -134,20 +134,21 @@ class HtmlToMarkdown
end
def visit_img(node)
if @opts[:keep_img_tags]
@stack[-1].markdown << node.to_html
else
title = node["alt"].presence || node["title"].presence
@stack[-1].markdown << "![#{title}](#{node["src"]})"
if is_valid_url?(node["src"])
if @opts[:keep_img_tags]
@stack[-1].markdown << node.to_html
else
title = node["alt"].presence || node["title"].presence
@stack[-1].markdown << "![#{title}](#{node["src"]})"
end
end
end
def visit_a(node)
href = node["href"]
if href.present? && (href.start_with?("http") || href.start_with?("www."))
if is_valid_url?(node["href"])
@stack[-1].markdown << "["
traverse(node)
@stack[-1].markdown << "](#{href})"
@stack[-1].markdown << "](#{node["href"]})"
else
traverse(node)
end
@ -203,4 +204,8 @@ class HtmlToMarkdown
(lines + [""]).join("\n")
end
def is_valid_url?(url)
url.present? && (url.start_with?("http") || url.start_with?("www."))
end
end

View File

@ -61,6 +61,12 @@ describe HtmlToMarkdown do
expect(HtmlToMarkdown.new(HTML_WITH_IMG, keep_img_tags: true).to_markdown).to eq(HTML_WITH_IMG)
end
it "removes empty & invalid <img>" do
expect(html_to_markdown(%Q{<img>})).to eq("")
expect(html_to_markdown(%Q{<img src="">})).to eq("")
expect(html_to_markdown(%Q{<img src="foo.bar">})).to eq("")
end
(1..6).each do |n|
it "converts <h#{n}>" do
expect(html_to_markdown("<h#{n}>Header #{n}</h#{n}>")).to eq("#" * n + " Header #{n}")