diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb
index 12bdcbc4aaf..45d423925db 100644
--- a/app/services/search_indexer.rb
+++ b/app/services/search_indexer.rb
@@ -284,7 +284,11 @@ class SearchIndexer
def self.scrub(html, strip_diacritics: false)
return +"" if html.blank?
- document = Nokogiri::HTML5("
#{html}
", nil, Encoding::UTF_8.to_s)
+ begin
+ document = Nokogiri::HTML5("#{html}
", nil, Encoding::UTF_8.to_s)
+ rescue ArgumentError
+ return +""
+ end
nodes = document.css(
"div.#{CookedPostProcessor::LIGHTBOX_WRAPPER_CSS_CLASS}"
diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb
index 936dca05377..659ef8cd4f6 100644
--- a/spec/services/search_indexer_spec.rb
+++ b/spec/services/search_indexer_spec.rb
@@ -144,6 +144,19 @@ describe SearchIndexer do
.to change { post.reload.post_search_data.search_data }
end
+ it 'should work with invalid HTML' do
+ post.update!(cooked: "" * Nokogumbo::DEFAULT_MAX_TREE_DEPTH)
+
+ SearchIndexer.update_posts_index(
+ post_id: post.id,
+ topic_title: post.topic.title,
+ category_name: post.topic.category&.name,
+ topic_tags: post.topic.tags.map(&:name).join(' '),
+ cooked: post.cooked,
+ private_message: post.topic.private_message?
+ )
+ end
+
it 'should not index posts with empty raw' do
expect do
post = Fabricate.build(:post, raw: "", post_type: Post.types[:small_action])