diff --git a/lib/html_to_markdown.rb b/lib/html_to_markdown.rb index d602c2d1174..6cd05ded831 100644 --- a/lib/html_to_markdown.rb +++ b/lib/html_to_markdown.rb @@ -53,7 +53,7 @@ class HtmlToMarkdown doc.css("br.#{klass}").each do |br| parent = br.parent - if parent.description.block? + if block?(parent) br.remove_class(klass) else before, after = parent.children.slice_when { |n| n == br }.to_a @@ -194,7 +194,7 @@ class HtmlToMarkdown BLOCKS ||= %w{div tr} BLOCKS.each do |tag| define_method("visit_#{tag}") do |node| - prefix = node.previous_element&.description&.block? ? "" : "\n" + prefix = block?(node.previous_element) ? "" : "\n" "#{prefix}#{traverse(node)}\n" end end @@ -283,7 +283,7 @@ class HtmlToMarkdown LISTS ||= %w{ul ol} LISTS.each do |tag| define_method("visit_#{tag}") do |node| - prefix = node.previous_element&.description&.block? ? "" : "\n" + prefix = block?(node.previous_element) ? "" : "\n" suffix = node.ancestors("ul, ol, li").size > 0 ? "" : "\n" "#{prefix}#{traverse(node)}#{suffix}" end @@ -358,4 +358,9 @@ class HtmlToMarkdown node.text end + HTML5_BLOCK_ELEMENTS ||= %w[article aside details dialog figcaption figure footer header main nav section] + def block?(node) + return false if !node + node.description&.block? || HTML5_BLOCK_ELEMENTS.include?(node.name) + end end diff --git a/spec/components/html_to_markdown_spec.rb b/spec/components/html_to_markdown_spec.rb index ea24eb8e66a..f95da5c8abb 100644 --- a/spec/components/html_to_markdown_spec.rb +++ b/spec/components/html_to_markdown_spec.rb @@ -38,9 +38,9 @@ describe HtmlToMarkdown do HTML markdown = <<~MD - Let me see if it happens by answering your message through Thunderbird. + Let me see if it happens by answering your message through Thunderbird. - Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 + Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 MD expect(html_to_markdown(html)).to eq(markdown.strip) @@ -70,13 +70,15 @@ describe HtmlToMarkdown do html = <<~HTML HTML markdown = <<~MD - > hello. + > Hello, + > is it me you're looking for? MD expect(html_to_markdown(html)).to eq(markdown.strip)