diff --git a/lib/html_to_markdown.rb b/lib/html_to_markdown.rb
index d602c2d1174..6cd05ded831 100644
--- a/lib/html_to_markdown.rb
+++ b/lib/html_to_markdown.rb
@@ -53,7 +53,7 @@ class HtmlToMarkdown
doc.css("br.#{klass}").each do |br|
parent = br.parent
- if parent.description.block?
+ if block?(parent)
br.remove_class(klass)
else
before, after = parent.children.slice_when { |n| n == br }.to_a
@@ -194,7 +194,7 @@ class HtmlToMarkdown
BLOCKS ||= %w{div tr}
BLOCKS.each do |tag|
define_method("visit_#{tag}") do |node|
- prefix = node.previous_element&.description&.block? ? "" : "\n"
+ prefix = block?(node.previous_element) ? "" : "\n"
"#{prefix}#{traverse(node)}\n"
end
end
@@ -283,7 +283,7 @@ class HtmlToMarkdown
LISTS ||= %w{ul ol}
LISTS.each do |tag|
define_method("visit_#{tag}") do |node|
- prefix = node.previous_element&.description&.block? ? "" : "\n"
+ prefix = block?(node.previous_element) ? "" : "\n"
suffix = node.ancestors("ul, ol, li").size > 0 ? "" : "\n"
"#{prefix}#{traverse(node)}#{suffix}"
end
@@ -358,4 +358,9 @@ class HtmlToMarkdown
node.text
end
+ HTML5_BLOCK_ELEMENTS ||= %w[article aside details dialog figcaption figure footer header main nav section]
+ def block?(node)
+ return false if !node
+ node.description&.block? || HTML5_BLOCK_ELEMENTS.include?(node.name)
+ end
end
diff --git a/spec/components/html_to_markdown_spec.rb b/spec/components/html_to_markdown_spec.rb
index ea24eb8e66a..f95da5c8abb 100644
--- a/spec/components/html_to_markdown_spec.rb
+++ b/spec/components/html_to_markdown_spec.rb
@@ -38,9 +38,9 @@ describe HtmlToMarkdown do
HTML
markdown = <<~MD
- Let me see if it happens by answering your message through Thunderbird.
+ Let me see if it happens by answering your message through Thunderbird.
- Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1
+ Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1
MD
expect(html_to_markdown(html)).to eq(markdown.strip)
@@ -70,13 +70,15 @@ describe HtmlToMarkdown do
html = <<~HTML
HTML
markdown = <<~MD
- > hello.
+ > Hello,
+ > is it me you're looking for?
MD
expect(html_to_markdown(html)).to eq(markdown.strip)