diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb index 40104c5b13c..530ac26c6d0 100644 --- a/app/services/search_indexer.rb +++ b/app/services/search_indexer.rb @@ -87,7 +87,17 @@ class SearchIndexer .scan(TS_VECTOR_PARSE_REGEX) .map do |term, _, indexes| new_indexes = - indexes.split(",").map { |index| additional_words[index.to_i - 1][1] }.join(",") + indexes + .split(",") + .map do |index| + existing_positions = additional_words[index.to_i - 1] + if existing_positions + existing_positions[1] + else + index + end + end + .join(",") "#{term}#{new_indexes}" end .join(" ") diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb index 13890c884c0..b8c1cc8b9eb 100644 --- a/spec/services/search_indexer_spec.rb +++ b/spec/services/search_indexer_spec.rb @@ -139,6 +139,24 @@ RSpec.describe SearchIndexer do } end + it "should work with edge case domain names" do + # 00E5A4 stems to 00e5 and a4, which is odd, but by-design + # this may cause internal indexing to fail due to indexes not aligning + # when stuffing terms for domains + post.update!(cooked: <<~HTML) + Test.00E5A4.1 + HTML + + SearchIndexer.update_posts_index( + post_id: post.id, + topic_title: post.topic.title, + category_name: post.topic.category&.name, + topic_tags: post.topic.tags.map(&:name).join(" "), + cooked: post.cooked, + private_message: post.topic.private_message?, + ) + end + it "should work with invalid HTML" do post.update!(cooked: "" * Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH)