diff --git a/lib/html_to_markdown.rb b/lib/html_to_markdown.rb
index d302f6f2380..3052cedd318 100644
--- a/lib/html_to_markdown.rb
+++ b/lib/html_to_markdown.rb
@@ -26,7 +26,7 @@ class HtmlToMarkdown
end
def remove_not_allowed!(doc)
- allowed = Set.new
+ allowed = Set.new(@opts[:additional_allowed_tags] || [])
HtmlToMarkdown.private_instance_methods.each do |m|
if tag = m.to_s[/^visit_(.+)/, 1]
diff --git a/spec/lib/html_to_markdown_spec.rb b/spec/lib/html_to_markdown_spec.rb
index 34b2cd30869..821d2ad1f15 100644
--- a/spec/lib/html_to_markdown_spec.rb
+++ b/spec/lib/html_to_markdown_spec.rb
@@ -65,6 +65,43 @@ RSpec.describe HtmlToMarkdown do
expect(html_to_markdown(html)).to eq(markdown.strip)
end
+ it "removes tags that aren't allowed" do
+ html = <<~HTML
+ Text withing custom tag
+
Text within allowed tag
+ HTML
+
+ expect(html_to_markdown(html)).to eq("Text within allowed tag")
+ end
+
+ it "allows additional tags that can be consumed by subclasses" do
+ class ExtendedHtmlToMarkdown < HtmlToMarkdown
+ def to_markdown
+ yield @doc
+ super
+ end
+ end
+
+ html = <<~HTML
+ Image text
+ Text within allowed tag
+ HTML
+
+ md =
+ ExtendedHtmlToMarkdown
+ .new(html)
+ .to_markdown { |doc| expect(doc.css("custom-image")).to be_empty }
+ expect(md).to eq("Text within allowed tag")
+
+ md =
+ ExtendedHtmlToMarkdown
+ .new(html, { additional_allowed_tags: ["custom-image"] })
+ .to_markdown do |doc|
+ doc.css("custom-image").each { |img| img.replace("Image #{img["image-id"]}") }
+ end
+ expect(md).to eq("Image 42\nText within allowed tag")
+ end
+
it "doesn't error on non-inline elements like (aside, section)" do
html = <<~HTML