diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb index 226b8a2843f..889c6c1ac0e 100644 --- a/app/services/search_indexer.rb +++ b/app/services/search_indexer.rb @@ -54,7 +54,7 @@ class SearchIndexer loop do count += 1 break if count >= 10 # Safeguard here to prevent infinite loop when a term has many dots - term, _, remaining = lexeme.partition(".") + _term, _, remaining = lexeme.partition(".") break if remaining.blank? array << "'#{remaining}':#{positions}" lexeme = remaining @@ -69,8 +69,8 @@ class SearchIndexer if (max_dupes = SiteSetting.max_duplicate_search_index_terms) > 0 reduced = [] tsvector - .scan(/([^\:]+\:)(([0-9]+[A-D]?,?)+)/) - .each do |term, indexes| + .scan(/('([^']*|'')*'\:)(([0-9]+[A-D]?,?)+)/) + .each do |term, _, indexes| family_counts = Hash.new(0) new_index_array = [] diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb index 1d892607f14..fa43789e9fb 100644 --- a/spec/services/search_indexer_spec.rb +++ b/spec/services/search_indexer_spec.rb @@ -307,15 +307,22 @@ RSpec.describe SearchIndexer do it "limits number of repeated terms when max_duplicate_search_index_terms site setting has been configured" do SiteSetting.max_duplicate_search_index_terms = 5 - contents = "I am #{"sam " * 10}" + contents = <<~TEXT + #{"sam " * 10} + url + TEXT + post.update!(raw: contents) post_search_data = post.post_search_data post_search_data.reload - expect(post_search_data.search_data).to eq( - "'sam':12,13,14,15,16 'test':8A 'titl':4A 'uncategor':9B", - ) + terms = + "'/path:path''path':22 'com':21 'sam':10,11,12,13,14 'something.com':21 'something.com/path:path''path':20 'test':8A 'titl':4A 'uncategor':9B 'url':23".split( + " ", + ).sort + + expect(post_search_data.search_data.split(" ").sort).to contain_exactly(*terms) end end