# frozen_string_literal: true require 'rails_helper' require 'html_to_markdown' describe HtmlToMarkdown do def html_to_markdown(html, opts = {}) HtmlToMarkdown.new(html, opts).to_markdown end it "remove whitespaces" do html = <<-HTML
Let me see if it happens by answering your message through Thunderbird.
Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1
HTML markdown = <<~MD Let me see if it happens by answering your message through Thunderbird. Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 Long sentence 1 MD expect(html_to_markdown(html)).to eq(markdown.strip) html = <<~HTML This post
has lots
of
space
This space was left untouched !HTML markdown = <<~MD This post has lots of space ``` This space was left untouched ! ``` MD expect(html_to_markdown(html)).to eq(markdown.strip) end it "doesn't error on non-inline elements like (aside, section)" do html = <<~HTML HTML markdown = <<~MD > hello. MD expect(html_to_markdown(html)).to eq(markdown.strip) end it "skips hidden tags" do expect(html_to_markdown(%Q{
Hello
})).to eq("Hello World!") expect(html_to_markdown(%Q{ World!Hello cruel World!
})).to eq("Hello World!") end it "converts " do expect(html_to_markdown("Strong")).to eq("**Strong**") expect(html_to_markdown("Str*ng")).to eq("__Str*ng__") end it "converts " do expect(html_to_markdown("Bold")).to eq("**Bold**") expect(html_to_markdown("B*ld")).to eq("__B*ld__") html = <<~HTML BeforeBold
if next character is \n" do expect(html_to_markdown("
Before
\nInside
After
" do
expect(html_to_markdown("Code
")).to eq("`Code`")
end
it "supports " do
expect(html_to_markdown("This is an insertion")).to eq("This is an insertion")
end
it "supports " do
expect(html_to_markdown("This is a deletion")).to eq("This is a deletion")
end
it "supports " do
expect(html_to_markdown("H2O")).to eq("H2O")
end
it "supports " do
expect(html_to_markdown("This is highlighted!")).to eq("This is highlighted!")
end
it "supports " do
expect(html_to_markdown("Super Script!")).to eq("Super Script!")
end
it "supports " do
expect(html_to_markdown("Small")).to eq("Small")
end
it "supports " do
expect(html_to_markdown("CTRL+C")).to eq("CTRL+C")
end
it "supports " do
expect(html_to_markdown(%Q{CDCK})).to eq(%Q{CDCK})
end
it "supports " do
expect(html_to_markdown("Strike Through")).to eq("~~Strike Through~~")
end
it "supports " do
expect(html_to_markdown("Strike Through")).to eq("~~Strike Through~~")
end
it "supports " do
expect(html_to_markdown("Quote
")).to eq("> Quote")
end
it "supports " do
expect(html_to_markdown("- 🍏
- 🍐
- 🍌
")).to eq("- 🍏\n- 🍐\n- 🍌")
expect(html_to_markdown("\n- 🍏
\n- 🍐
\n- 🍌
\n
")).to eq("- 🍏\n- 🍐\n- 🍌")
end
it "supports " do
expect(html_to_markdown("- 🍆
- 🍅
- 🍄
")).to eq("1. 🍆\n1. 🍅\n1. 🍄")
end
it "supports inside
- " do
expect(html_to_markdown("
🍏
🍐
🍌
")).to eq("- 🍏\n\n- 🍐\n\n- 🍌")
end
it "supports inside " do
expect(html_to_markdown(<<-HTML
- Fruits
- 🍏
- 🍐
- 🍌
- Vegetables
- 🍆
- 🍅
- 🍄
HTML
)).to eq("- Fruits\n - 🍏\n - 🍐\n - 🍌\n- Vegetables\n - 🍆\n - 🍅\n - 🍄")
end
it "supports bare - " do
expect(html_to_markdown("
- I'm alone
")).to eq("- I'm alone")
end
it "supports
" do
expect(html_to_markdown("var foo = 'bar';
")).to eq("```\nvar foo = 'bar';\n```")
expect(html_to_markdown("var foo = 'bar';
")).to eq("```\nvar foo = 'bar';\n```")
expect(html_to_markdown(%Q{var foo = 'bar';
})).to eq("```javascript\nvar foo = 'bar';\n```")
expect(html_to_markdown(" function f() {\n console.log('Hello world!');\n }
")).to eq("```\n function f() {\n console.log('Hello world!');\n }\n```")
end
it "supports inside " do
expect(html_to_markdown("var foo = 'bar';
")).to eq("> ```\n> var foo = 'bar';\n> ```")
end
it "works" do
expect(html_to_markdown("A list item with a blockquote:
This is a blockquote
inside a list item.
")).to eq("- A list item with a blockquote:\n\n > This is a **blockquote**\n > inside a list item.")
end
it "supports html document" do
expect(html_to_markdown("HelloWorld")).to eq("Hello\nWorld")
end
it "handles " do
expect(html_to_markdown("
1st paragraph
2nd paragraph
")).to eq("1st paragraph\n\n2nd paragraph")
expect(html_to_markdown("1st paragraph
\n 2nd paragraph\n 2nd paragraph
\n3rd paragraph
")).to eq("1st paragraph\n\n2nd paragraph 2nd paragraph\n\n3rd paragraph")
end
it "handles " do
expect(html_to_markdown("1st div2nd div")).to eq("1st div\n2nd div")
end
it "swallows " do
expect(html_to_markdown("Span")).to eq("Span")
end
it "swallows " do
expect(html_to_markdown("Underline")).to eq("Underline")
end
it "removes ")).to eq("")
end
it "removes ")).to eq("")
end
it "handles and
within " do
html = "1st paragraph2nd paragraph3rd paragraph
"
expect(html_to_markdown(html)).to eq("1st paragraph\n2nd paragraph\n\n3rd paragraph")
end
it "handles and
within " do
html = "1st paragraph
2nd paragraph3rd paragraph4th paragraph
"
expect(html_to_markdown(html)).to eq("1st paragraph\n2nd paragraph\n3rd paragraph\n\n4th paragraph")
end
context "with an oddly placed
" do
it "handles " do
expect(html_to_markdown("Hello
Bold World")).to eq("Hello\n**Bold** World")
expect(html_to_markdown("Hello Bold
World")).to eq("Hello **Bold**\nWorld")
expect(html_to_markdown("Hello Bold
text World")).to eq("Hello **Bold**\n**text** World")
end
it "handles " do
expect(html_to_markdown("Hello
Italic World")).to eq("Hello\n*Italic* World")
expect(html_to_markdown("Hello Italic
World")).to eq("Hello *Italic*\nWorld")
expect(html_to_markdown("Hello Italic
text World")).to eq("Hello *Italic*\n*text* World")
end
it "works" do
expect(html_to_markdown("A B C
D E
F G")).to eq("A __B *C*__\n__*D* E__\n**F** G")
end
end
context "with an empty tag" do
it "handles " do
expect(html_to_markdown("")).to eq("")
expect(html_to_markdown(" ")).to eq("")
expect(html_to_markdown("Some text")).to eq("Some text")
expect(html_to_markdown("Some text")).to eq("Some text")
end
it "handles " do
expect(html_to_markdown("")).to eq("")
expect(html_to_markdown(" ")).to eq("")
expect(html_to_markdown("Some text")).to eq("Some text")
expect(html_to_markdown("Some text")).to eq("Some text")
end
end
context "with spaces around text" do
it "handles " do
expect(html_to_markdown(" Bold")).to eq("**Bold**")
expect(html_to_markdown(" Bold")).to eq("**Bold**")
expect(html_to_markdown("Bold ")).to eq("**Bold**")
expect(html_to_markdown("Bold ")).to eq("**Bold**")
expect(html_to_markdown("Some bold text")).to eq("Some **bold** text")
expect(html_to_markdown("Some bold text")).to eq("Some **bold** text")
expect(html_to_markdown("Some bold text")).to eq("Some **bold** text")
expect(html_to_markdown("Some bold text")).to eq("Some **bold** text")
end
it "handles " do
expect(html_to_markdown(" Italic")).to eq("*Italic*")
expect(html_to_markdown(" Italic")).to eq("*Italic*")
expect(html_to_markdown("Italic ")).to eq("*Italic*")
expect(html_to_markdown("Italic ")).to eq("*Italic*")
expect(html_to_markdown("Some italic text")).to eq("Some *italic* text")
expect(html_to_markdown("Some italic text")).to eq("Some *italic* text")
expect(html_to_markdown("Some italic text")).to eq("Some *italic* text")
expect(html_to_markdown("Some italic text")).to eq("Some *italic* text")
end
end
it "supoorts " do
html = <<~HTML
This
is
the
headers
I am
the
first
row
And this
is the
2nd
line
HTML
markdown = <<~MD
| This | is | the | *headers* |
| - | - | - | - |
| I am | the | **first** | row |
| And this | is the | 2nd | line |
MD
expect(html_to_markdown(html)).to eq(markdown.strip)
expect(html_to_markdown("Hello World
")).to eq("| Hello | World |\n| - | - |")
end
it "doesn't swallow badly formatted " do
html = <<~HTML
1
2
3
4
One
Two
Three
HTML
expect(html_to_markdown(html)).to eq("1 2 3 4 \nOne Two Three")
end
end