FIX: properly trim whitespaces (including those pesky html entities)
This commit is contained in:
parent
dbb6e461aa
commit
c880af8120
|
@ -15,10 +15,10 @@ class HtmlToMarkdown
|
||||||
def remove_whitespaces!
|
def remove_whitespaces!
|
||||||
@doc.traverse do |node|
|
@doc.traverse do |node|
|
||||||
if node.is_a? Nokogiri::XML::Text
|
if node.is_a? Nokogiri::XML::Text
|
||||||
node.content = node.content.lstrip if node.previous_element&.description&.block?
|
node.content = node.content.gsub(/\A[[:space:]]+/, "") if node.previous_element&.description&.block?
|
||||||
node.content = node.content.lstrip if node.previous_element.nil? && node.parent.description&.block?
|
node.content = node.content.gsub(/\A[[:space:]]+/, "") if node.previous_element.nil? && node.parent.description&.block?
|
||||||
node.content = node.content.rstrip if node.next_element&.description&.block?
|
node.content = node.content.gsub(/[[:space:]]+\z/, "") if node.next_element&.description&.block?
|
||||||
node.content = node.content.rstrip if node.next_element.nil? && node.parent.description&.block?
|
node.content = node.content.gsub(/[[:space:]]+\z/, "") if node.next_element.nil? && node.parent.description&.block?
|
||||||
node.remove if node.content.empty?
|
node.remove if node.content.empty?
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -7,6 +7,20 @@ describe HtmlToMarkdown do
|
||||||
HtmlToMarkdown.new(html).to_markdown
|
HtmlToMarkdown.new(html).to_markdown
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "remove whitespaces" do
|
||||||
|
expect(html_to_markdown(<<-HTML
|
||||||
|
<div dir="auto">Hello,
|
||||||
|
<div dir="auto"><br></div>
|
||||||
|
<div dir="auto"> This is the 1st paragraph. </div>
|
||||||
|
<div dir="auto"><br></div>
|
||||||
|
<div dir="auto">
|
||||||
|
This is another paragraph
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
HTML
|
||||||
|
)).to eq("Hello,\n\nThis is the 1st paragraph.\n\nThis is another paragraph")
|
||||||
|
end
|
||||||
|
|
||||||
it "converts <strong>" do
|
it "converts <strong>" do
|
||||||
expect(html_to_markdown("<strong>Strong</strong>")).to eq("**Strong**")
|
expect(html_to_markdown("<strong>Strong</strong>")).to eq("**Strong**")
|
||||||
expect(html_to_markdown("<strong>Str*ng</strong>")).to eq("__Str*ng__")
|
expect(html_to_markdown("<strong>Str*ng</strong>")).to eq("__Str*ng__")
|
||||||
|
|
Loading…
Reference in New Issue