FIX: `HtmlToMarkdown` should keep HTML entities for <, > and & within HTML elements
Not all HTML elements are converted into Markdown. Some are kept as HTML. Without this fix XML/HTML entities that are formatted as text instead of code are swallowed by Discourse. This also fixes quotes in the `title` attribute of the `<abbr>` tag.
This commit is contained in:
parent
3c9d61d302
commit
7bdf47b864
|
@ -197,7 +197,9 @@ class HtmlToMarkdown
|
|||
|
||||
ALLOWED ||= %w[kbd del ins small big sub sup dl dd dt mark]
|
||||
ALLOWED.each do |tag|
|
||||
define_method("visit_#{tag}") { |node| "<#{tag}>#{traverse(node)}</#{tag}>" }
|
||||
define_method("visit_#{tag}") do |node|
|
||||
"<#{tag}>#{traverse(node, within_html_block: true)}</#{tag}>"
|
||||
end
|
||||
end
|
||||
|
||||
def visit_blockquote(node)
|
||||
|
@ -250,8 +252,8 @@ class HtmlToMarkdown
|
|||
|
||||
def visit_abbr(node)
|
||||
title = node["title"].presence
|
||||
title_attr = title ? %[ title="#{title}"] : ""
|
||||
"<abbr#{title_attr}>#{traverse(node)}</abbr>"
|
||||
attributes = { title: } if title
|
||||
create_element("abbr", traverse(node, within_html_block: true), attributes).to_html
|
||||
end
|
||||
|
||||
def visit_acronym(node)
|
||||
|
|
|
@ -259,6 +259,35 @@ RSpec.describe HtmlToMarkdown do
|
|||
expect(html_to_markdown("<code>Code</code>")).to eq("`Code`")
|
||||
end
|
||||
|
||||
describe "when HTML is used within Markdown" do
|
||||
HtmlToMarkdown::ALLOWED.each do |tag|
|
||||
it "keeps mandatory HTML entities in text of <#{tag}>" do
|
||||
expect(html_to_markdown("<#{tag}>Less than: <</#{tag}>")).to eq(
|
||||
"<#{tag}>Less than: <</#{tag}>",
|
||||
)
|
||||
expect(html_to_markdown("<#{tag}>Greater than: >")).to eq(
|
||||
"<#{tag}>Greater than: ></#{tag}>",
|
||||
)
|
||||
expect(html_to_markdown("<#{tag}>Ampersand: &")).to eq(
|
||||
"<#{tag}>Ampersand: &</#{tag}>",
|
||||
)
|
||||
|
||||
expect(html_to_markdown("<#{tag}>Double Quote: "</#{tag}>")).to eq(
|
||||
"<#{tag}>Double Quote: \"</#{tag}>",
|
||||
)
|
||||
expect(html_to_markdown("<#{tag}>Single Quote: '</#{tag}>")).to eq(
|
||||
"<#{tag}>Single Quote: '</#{tag}>",
|
||||
)
|
||||
expect(html_to_markdown("<#{tag}>Copyright Symbol: ©</#{tag}>")).to eq(
|
||||
"<#{tag}>Copyright Symbol: ©</#{tag}>",
|
||||
)
|
||||
expect(html_to_markdown("<#{tag}>Euro Symbol: €</#{tag}>")).to eq(
|
||||
"<#{tag}>Euro Symbol: €</#{tag}>",
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
it "supports <ins>" do
|
||||
expect(html_to_markdown("This is an <ins>insertion</ins>")).to eq(
|
||||
"This is an <ins>insertion</ins>",
|
||||
|
@ -285,16 +314,37 @@ RSpec.describe HtmlToMarkdown do
|
|||
|
||||
it "supports <small>" do
|
||||
expect(html_to_markdown("<small>Small</small>")).to eq("<small>Small</small>")
|
||||
expect(html_to_markdown("<mark><small>Small</small></mark>")).to eq(
|
||||
"<mark><small>Small</small></mark>",
|
||||
)
|
||||
expect(html_to_markdown("<strong><small>Small</small></strong>")).to eq(
|
||||
"**<small>Small</small>**",
|
||||
)
|
||||
expect(html_to_markdown("<small><strong><small></strong></small>")).to eq(
|
||||
"<small>**<small>**</small>",
|
||||
)
|
||||
end
|
||||
|
||||
it "supports <big>" do
|
||||
expect(html_to_markdown("<big>Big</big>")).to eq("<big>Big</big>")
|
||||
expect(html_to_markdown("<big><big></big>")).to eq("<big><big></big>")
|
||||
end
|
||||
|
||||
it "supports <kbd>" do
|
||||
expect(html_to_markdown("<kbd>CTRL</kbd>+<kbd>C</kbd>")).to eq("<kbd>CTRL</kbd>+<kbd>C</kbd>")
|
||||
expect(html_to_markdown("<kbd><</kbd>")).to eq("<kbd><</kbd>")
|
||||
end
|
||||
|
||||
it "supports <abbr>" do
|
||||
expect(
|
||||
html_to_markdown(%Q{<abbr title="Civilized Discourse Construction Kit, Inc.">CDCK</abbr>}),
|
||||
).to eq(%Q{<abbr title="Civilized Discourse Construction Kit, Inc.">CDCK</abbr>})
|
||||
|
||||
expect(
|
||||
html_to_markdown(
|
||||
%Q{<abbr title=""abbr": The Abbreviation element"><abbr></abbr>},
|
||||
),
|
||||
).to eq(%Q{<abbr title=""abbr": The Abbreviation element"><abbr></abbr>})
|
||||
end
|
||||
|
||||
it "supports <s>" do
|
||||
|
@ -366,6 +416,18 @@ RSpec.describe HtmlToMarkdown do
|
|||
"<pre> function f() {\n console.log('Hello world!');\n }</pre>",
|
||||
),
|
||||
).to eq("```\n function f() {\n console.log('Hello world!');\n }\n```")
|
||||
|
||||
html = <<~HTML
|
||||
<pre data-code-wrap="plaintext"><code class="lang-plaintext">Reported-and-tested-by: A <a@example.com>
|
||||
Reviewed-by: B <b@example.com></code></pre>
|
||||
HTML
|
||||
md = <<~MD
|
||||
```plaintext
|
||||
Reported-and-tested-by: A <a@example.com>
|
||||
Reviewed-by: B <b@example.com>
|
||||
```
|
||||
MD
|
||||
expect(html_to_markdown(html)).to eq(md.strip)
|
||||
end
|
||||
|
||||
it "supports <pre> inside <blockquote>" do
|
||||
|
|
Loading…
Reference in New Issue