FIX: HtmlToMarkdown should not convert empty/bad <img> tags
This commit is contained in:
parent
c880af8120
commit
bff36de130
|
@ -134,6 +134,7 @@ class HtmlToMarkdown
|
|||
end
|
||||
|
||||
def visit_img(node)
|
||||
if is_valid_url?(node["src"])
|
||||
if @opts[:keep_img_tags]
|
||||
@stack[-1].markdown << node.to_html
|
||||
else
|
||||
|
@ -141,13 +142,13 @@ class HtmlToMarkdown
|
|||
@stack[-1].markdown << "![#{title}](#{node["src"]})"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def visit_a(node)
|
||||
href = node["href"]
|
||||
if href.present? && (href.start_with?("http") || href.start_with?("www."))
|
||||
if is_valid_url?(node["href"])
|
||||
@stack[-1].markdown << "["
|
||||
traverse(node)
|
||||
@stack[-1].markdown << "](#{href})"
|
||||
@stack[-1].markdown << "](#{node["href"]})"
|
||||
else
|
||||
traverse(node)
|
||||
end
|
||||
|
@ -203,4 +204,8 @@ class HtmlToMarkdown
|
|||
(lines + [""]).join("\n")
|
||||
end
|
||||
|
||||
def is_valid_url?(url)
|
||||
url.present? && (url.start_with?("http") || url.start_with?("www."))
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -61,6 +61,12 @@ describe HtmlToMarkdown do
|
|||
expect(HtmlToMarkdown.new(HTML_WITH_IMG, keep_img_tags: true).to_markdown).to eq(HTML_WITH_IMG)
|
||||
end
|
||||
|
||||
it "removes empty & invalid <img>" do
|
||||
expect(html_to_markdown(%Q{<img>})).to eq("")
|
||||
expect(html_to_markdown(%Q{<img src="">})).to eq("")
|
||||
expect(html_to_markdown(%Q{<img src="foo.bar">})).to eq("")
|
||||
end
|
||||
|
||||
(1..6).each do |n|
|
||||
it "converts <h#{n}>" do
|
||||
expect(html_to_markdown("<h#{n}>Header #{n}</h#{n}>")).to eq("#" * n + " Header #{n}")
|
||||
|
|
Loading…
Reference in New Issue