FIX: Make HTML scrubber work with deep HTML (#12619)
SearchIndexer and ReindexSearch used to explode for posts with very deep or invalid HTML content.
This commit is contained in:
parent
cdd5b60447
commit
c10df4b58d
|
@ -284,7 +284,11 @@ class SearchIndexer
|
|||
def self.scrub(html, strip_diacritics: false)
|
||||
return +"" if html.blank?
|
||||
|
||||
document = Nokogiri::HTML5("<div>#{html}</div>", nil, Encoding::UTF_8.to_s)
|
||||
begin
|
||||
document = Nokogiri::HTML5("<div>#{html}</div>", nil, Encoding::UTF_8.to_s)
|
||||
rescue ArgumentError
|
||||
return +""
|
||||
end
|
||||
|
||||
nodes = document.css(
|
||||
"div.#{CookedPostProcessor::LIGHTBOX_WRAPPER_CSS_CLASS}"
|
||||
|
|
|
@ -144,6 +144,19 @@ describe SearchIndexer do
|
|||
.to change { post.reload.post_search_data.search_data }
|
||||
end
|
||||
|
||||
it 'should work with invalid HTML' do
|
||||
post.update!(cooked: "<FD>" * Nokogumbo::DEFAULT_MAX_TREE_DEPTH)
|
||||
|
||||
SearchIndexer.update_posts_index(
|
||||
post_id: post.id,
|
||||
topic_title: post.topic.title,
|
||||
category_name: post.topic.category&.name,
|
||||
topic_tags: post.topic.tags.map(&:name).join(' '),
|
||||
cooked: post.cooked,
|
||||
private_message: post.topic.private_message?
|
||||
)
|
||||
end
|
||||
|
||||
it 'should not index posts with empty raw' do
|
||||
expect do
|
||||
post = Fabricate.build(:post, raw: "", post_type: Post.types[:small_action])
|
||||
|
|
Loading…
Reference in New Issue