FIX: Limit PG headline based search blurb generation to 200 characters.
* Recovers omission characters '...' in blurb as well.
This commit is contained in:
parent
ec173a72d9
commit
93f8396b4b
|
@ -1177,8 +1177,28 @@ class Search
|
|||
.joins("INNER JOIN post_search_data pd ON pd.post_id = posts.id")
|
||||
.joins("INNER JOIN topics t1 ON t1.id = posts.topic_id")
|
||||
.select(
|
||||
"TS_HEADLINE(#{ts_config}, t1.fancy_title, PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS topic_title_headline",
|
||||
"TS_HEADLINE(#{ts_config}, LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}), PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS headline",
|
||||
"TS_HEADLINE(
|
||||
#{ts_config},
|
||||
t1.fancy_title,
|
||||
PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
|
||||
'StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>'''
|
||||
) AS topic_title_headline",
|
||||
"TS_HEADLINE(
|
||||
#{ts_config},
|
||||
LEFT(
|
||||
TS_HEADLINE(
|
||||
#{ts_config},
|
||||
LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}),
|
||||
PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
|
||||
'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel='''', StopSel='''''
|
||||
),
|
||||
#{Search::GroupedSearchResults::BLURB_LENGTH}
|
||||
),
|
||||
PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
|
||||
'HighlightAll=true, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>'''
|
||||
) AS headline",
|
||||
"LEFT(pd.raw_data, 50) AS leading_raw_data",
|
||||
"RIGHT(pd.raw_data, 50) AS trailing_raw_data",
|
||||
default_scope.arel.projections
|
||||
)
|
||||
else
|
||||
|
|
|
@ -78,6 +78,9 @@ class Search
|
|||
end
|
||||
end
|
||||
|
||||
OMISSION = '...'
|
||||
SCRUB_HEADLINE_REGEXP = /<span(?: \w+="[^"]+")* class="#{Search::HIGHLIGHT_CSS_CLASS}"(?: \w+="[^"]+")*>([^<]*)<\/span>/
|
||||
|
||||
def blurb(post)
|
||||
opts = {
|
||||
term: @blurb_term,
|
||||
|
@ -86,7 +89,10 @@ class Search
|
|||
|
||||
if post.post_search_data.version > SearchIndexer::MIN_POST_REINDEX_VERSION
|
||||
if SiteSetting.use_pg_headlines_for_excerpt
|
||||
return post.headline
|
||||
scrubbed_headline = post.headline.gsub(SCRUB_HEADLINE_REGEXP, '\1')
|
||||
prefix_omission = scrubbed_headline.start_with?(post.leading_raw_data) ? '' : OMISSION
|
||||
postfix_omission = scrubbed_headline.end_with?(post.trailing_raw_data) ? '' : OMISSION
|
||||
return "#{prefix_omission}#{post.headline}#{postfix_omission}"
|
||||
else
|
||||
opts[:cooked] = post.post_search_data.raw_data
|
||||
opts[:scrub] = false
|
||||
|
|
|
@ -410,7 +410,7 @@ describe Search do
|
|||
end
|
||||
|
||||
let(:expected_blurb) do
|
||||
"hundred characters to satisfy any test conditions that require content longer than the typical test post raw content. It really is some long content, folks. <span class=\"search-highlight\">elephant</span>"
|
||||
"#{Search::GroupedSearchResults::OMISSION}hundred characters to satisfy any test conditions that require content longer than the typical test post raw content. It really is some long content, folks. <span class=\"#{Search::HIGHLIGHT_CSS_CLASS}\">elephant</span>"
|
||||
end
|
||||
|
||||
it 'returns the post' do
|
||||
|
@ -429,7 +429,7 @@ describe Search do
|
|||
expect(post.topic_title_headline).to eq(topic.fancy_title)
|
||||
end
|
||||
|
||||
it "it limits the headline to #{Search::MAX_LENGTH_FOR_HEADLINE} characters" do
|
||||
it "only applies highlighting to the first #{Search::MAX_LENGTH_FOR_HEADLINE} characters" do
|
||||
SiteSetting.use_pg_headlines_for_excerpt = true
|
||||
|
||||
reply.update!(raw: "#{'a' * Search::MAX_LENGTH_FOR_HEADLINE} #{reply.raw}")
|
||||
|
@ -443,6 +443,20 @@ describe Search do
|
|||
expect(post.headline.include?('elephant')).to eq(false)
|
||||
end
|
||||
|
||||
it "limits the search headline to #{Search::GroupedSearchResults::BLURB_LENGTH} characters" do
|
||||
SiteSetting.use_pg_headlines_for_excerpt = true
|
||||
|
||||
reply.update!(raw: "#{'a' * Search::GroupedSearchResults::BLURB_LENGTH} elephant")
|
||||
|
||||
result = Search.execute('elephant')
|
||||
|
||||
expect(result.posts.map(&:id)).to contain_exactly(reply.id)
|
||||
|
||||
post = result.posts.first
|
||||
|
||||
expect(result.blurb(post)).to eq("#{'a' * Search::GroupedSearchResults::BLURB_LENGTH}#{Search::GroupedSearchResults::OMISSION}")
|
||||
end
|
||||
|
||||
it 'returns the right post and blurb for searches with phrase' do
|
||||
SiteSetting.use_pg_headlines_for_excerpt = true
|
||||
|
||||
|
|
Loading…
Reference in New Issue