diff --git a/lib/search.rb b/lib/search.rb index 16b813d8587..c2b4a13b52f 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -1177,8 +1177,28 @@ class Search .joins("INNER JOIN post_search_data pd ON pd.post_id = posts.id") .joins("INNER JOIN topics t1 ON t1.id = posts.topic_id") .select( - "TS_HEADLINE(#{ts_config}, t1.fancy_title, PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'StartSel='''', StopSel=''''') AS topic_title_headline", - "TS_HEADLINE(#{ts_config}, LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}), PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel='''', StopSel=''''') AS headline", + "TS_HEADLINE( + #{ts_config}, + t1.fancy_title, + PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), + 'StartSel='''', StopSel=''''' + ) AS topic_title_headline", + "TS_HEADLINE( + #{ts_config}, + LEFT( + TS_HEADLINE( + #{ts_config}, + LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}), + PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), + 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel='''', StopSel=''''' + ), + #{Search::GroupedSearchResults::BLURB_LENGTH} + ), + PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), + 'HighlightAll=true, StartSel='''', StopSel=''''' + ) AS headline", + "LEFT(pd.raw_data, 50) AS leading_raw_data", + "RIGHT(pd.raw_data, 50) AS trailing_raw_data", default_scope.arel.projections ) else diff --git a/lib/search/grouped_search_results.rb b/lib/search/grouped_search_results.rb index cac0c52897f..470a732e7d8 100644 --- a/lib/search/grouped_search_results.rb +++ b/lib/search/grouped_search_results.rb @@ -78,6 +78,9 @@ class Search end end + OMISSION = '...' + SCRUB_HEADLINE_REGEXP = /([^<]*)<\/span>/ + def blurb(post) opts = { term: @blurb_term, @@ -86,7 +89,10 @@ class Search if post.post_search_data.version > SearchIndexer::MIN_POST_REINDEX_VERSION if SiteSetting.use_pg_headlines_for_excerpt - return post.headline + scrubbed_headline = post.headline.gsub(SCRUB_HEADLINE_REGEXP, '\1') + prefix_omission = scrubbed_headline.start_with?(post.leading_raw_data) ? '' : OMISSION + postfix_omission = scrubbed_headline.end_with?(post.trailing_raw_data) ? '' : OMISSION + return "#{prefix_omission}#{post.headline}#{postfix_omission}" else opts[:cooked] = post.post_search_data.raw_data opts[:scrub] = false diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index 2588bc27e09..3afe2c8d85a 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -410,7 +410,7 @@ describe Search do end let(:expected_blurb) do - "hundred characters to satisfy any test conditions that require content longer than the typical test post raw content. It really is some long content, folks. elephant" + "#{Search::GroupedSearchResults::OMISSION}hundred characters to satisfy any test conditions that require content longer than the typical test post raw content. It really is some long content, folks. elephant" end it 'returns the post' do @@ -429,7 +429,7 @@ describe Search do expect(post.topic_title_headline).to eq(topic.fancy_title) end - it "it limits the headline to #{Search::MAX_LENGTH_FOR_HEADLINE} characters" do + it "only applies highlighting to the first #{Search::MAX_LENGTH_FOR_HEADLINE} characters" do SiteSetting.use_pg_headlines_for_excerpt = true reply.update!(raw: "#{'a' * Search::MAX_LENGTH_FOR_HEADLINE} #{reply.raw}") @@ -443,6 +443,20 @@ describe Search do expect(post.headline.include?('elephant')).to eq(false) end + it "limits the search headline to #{Search::GroupedSearchResults::BLURB_LENGTH} characters" do + SiteSetting.use_pg_headlines_for_excerpt = true + + reply.update!(raw: "#{'a' * Search::GroupedSearchResults::BLURB_LENGTH} elephant") + + result = Search.execute('elephant') + + expect(result.posts.map(&:id)).to contain_exactly(reply.id) + + post = result.posts.first + + expect(result.blurb(post)).to eq("#{'a' * Search::GroupedSearchResults::BLURB_LENGTH}#{Search::GroupedSearchResults::OMISSION}") + end + it 'returns the right post and blurb for searches with phrase' do SiteSetting.use_pg_headlines_for_excerpt = true