FEATURE: prioritize_exact_search_title_match hidden setting (#20089)

The new `prioritize_exact_search_match` can be used to force the search
algorithm to prioritize exact term matches in title when ranking results.

This is scoped narrowly to titles for cases such as a topic titled:

"organisation chart" and a search of "org chart".

If we scoped this wider, all discussion about "org chart" would float to
the top and leave a very common title de-prioritized.

This is a hidden site setting and it has some performance impact due
to double ranking.

That said, performance impact is somewhat mitigated cause ranking on
title alone is a very cheap operation.
This commit is contained in:
Sam 2023-01-31 16:34:01 +11:00 committed by GitHub
parent f31f0b70f8
commit c5345d0e54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 83 additions and 17 deletions

View File

@ -2194,6 +2194,9 @@ backups:
client: true client: true
search: search:
prioritize_exact_search_title_match:
default: false
hidden: true
max_duplicate_search_index_terms: max_duplicate_search_index_terms:
default: -1 default: -1
hidden: true hidden: true

View File

@ -1132,14 +1132,13 @@ class Search
posts = posts.order("posts.like_count DESC") posts = posts.order("posts.like_count DESC")
end end
elsif !is_topic_search elsif !is_topic_search
rank = <<~SQL exact_rank = nil
TS_RANK_CD(
#{SiteSetting.search_ranking_weights.present? ? "'#{SiteSetting.search_ranking_weights}'," : ""} if SiteSetting.prioritize_exact_search_title_match
post_search_data.search_data, exact_rank = ts_rank_cd(weight_filter: "A", prefix_match: false)
#{@term.blank? ? "" : ts_query(weight_filter: weights)}, end
#{SiteSetting.search_ranking_normalization}|32
) rank = ts_rank_cd(weight_filter: weights)
SQL
if type_filter != "private_messages" if type_filter != "private_messages"
category_search_priority = <<~SQL category_search_priority = <<~SQL
@ -1170,6 +1169,22 @@ class Search
) )
SQL SQL
posts =
if aggregate_search
posts.order("MAX(#{category_search_priority}) DESC")
else
posts.order("#{category_search_priority} DESC")
end
if @term.present? && exact_rank
posts =
if aggregate_search
posts.order("MAX(#{exact_rank} * #{category_priority_weights}) DESC")
else
posts.order("#{exact_rank} * #{category_priority_weights} DESC")
end
end
data_ranking = data_ranking =
if @term.blank? if @term.blank?
"(#{category_priority_weights})" "(#{category_priority_weights})"
@ -1179,9 +1194,9 @@ class Search
posts = posts =
if aggregate_search if aggregate_search
posts.order("MAX(#{category_search_priority}) DESC", "MAX(#{data_ranking}) DESC") posts.order("MAX(#{data_ranking}) DESC")
else else
posts.order("#{category_search_priority} DESC", "#{data_ranking} DESC") posts.order("#{data_ranking} DESC")
end end
end end
@ -1211,6 +1226,17 @@ class Search
posts.limit(limit) posts.limit(limit)
end end
def ts_rank_cd(weight_filter:, prefix_match: true)
<<~SQL
TS_RANK_CD(
#{SiteSetting.search_ranking_weights.present? ? "'#{SiteSetting.search_ranking_weights}'," : ""}
post_search_data.search_data,
#{@term.blank? ? "" : ts_query(weight_filter: weight_filter, prefix_match: prefix_match)},
#{SiteSetting.search_ranking_normalization}|32
)
SQL
end
def categories_ignored(posts) def categories_ignored(posts)
posts.where(<<~SQL, Searchable::PRIORITIES[:ignore]) posts.where(<<~SQL, Searchable::PRIORITIES[:ignore])
(categories.search_priority IS NULL OR categories.search_priority IS NOT NULL AND categories.search_priority <> ?) (categories.search_priority IS NULL OR categories.search_priority IS NOT NULL AND categories.search_priority <> ?)
@ -1225,8 +1251,11 @@ class Search
self.class.default_ts_config self.class.default_ts_config
end end
def self.ts_query(term:, ts_config: nil, joiner: nil, weight_filter: nil) def self.ts_query(term:, ts_config: nil, joiner: nil, weight_filter: nil, prefix_match: true)
to_tsquery(ts_config: ts_config, term: set_tsquery_weight_filter(term, weight_filter)) to_tsquery(
ts_config: ts_config,
term: set_tsquery_weight_filter(term, weight_filter, prefix_match: prefix_match),
)
end end
def self.to_tsquery(ts_config: nil, term:, joiner: nil) def self.to_tsquery(ts_config: nil, term:, joiner: nil)
@ -1237,8 +1266,8 @@ class Search
tsquery tsquery
end end
def self.set_tsquery_weight_filter(term, weight_filter) def self.set_tsquery_weight_filter(term, weight_filter, prefix_match: true)
"'#{self.escape_string(term)}':*#{weight_filter}" "'#{self.escape_string(term)}':#{prefix_match ? "*" : ""}#{weight_filter}"
end end
def self.escape_string(term) def self.escape_string(term)
@ -1251,11 +1280,16 @@ class Search
PG::Connection.escape_string(term).gsub('\\', '\\\\\\') PG::Connection.escape_string(term).gsub('\\', '\\\\\\')
end end
def ts_query(ts_config = nil, weight_filter: nil) def ts_query(ts_config = nil, weight_filter: nil, prefix_match: true)
@ts_query_cache ||= {} @ts_query_cache ||= {}
@ts_query_cache[ @ts_query_cache[
"#{ts_config || default_ts_config} #{@term} #{weight_filter}" "#{ts_config || default_ts_config} #{@term} #{weight_filter} #{prefix_match}"
] ||= Search.ts_query(term: @term, ts_config: ts_config, weight_filter: weight_filter) ] ||= Search.ts_query(
term: @term,
ts_config: ts_config,
weight_filter: weight_filter,
prefix_match: prefix_match,
)
end end
def wrap_rows(query) def wrap_rows(query)

View File

@ -2603,6 +2603,35 @@ RSpec.describe Search do
end end
end end
context "when prioritize_exact_search_match is enabled" do
before { SearchIndexer.enable }
after { SearchIndexer.disable }
it "correctly ranks topics" do
SiteSetting.prioritize_exact_search_title_match = true
topic1 = Fabricate(:topic, title: "saml saml saml is the best")
post1 = Fabricate(:post, topic: topic1, raw: "this topic is a story about saml")
topic2 = Fabricate(:topic, title: "sam has ideas about lots of things")
post2 = Fabricate(:post, topic: topic2, raw: "this topic is not about saml saml saml")
topic3 = Fabricate(:topic, title: "jane has ideas about lots of things")
post3 = Fabricate(:post, topic: topic3, raw: "sam sam sam sam lets add sams")
SearchIndexer.index(post1, force: true)
SearchIndexer.index(post2, force: true)
SearchIndexer.index(post3, force: true)
result = Search.execute("sam")
expect(result.posts.length).to eq(3)
# title match should win cause we limited duplication
expect(result.posts.pluck(:id)).to eq([post2.id, post1.id, post3.id])
end
end
context "when max_duplicate_search_index_terms limits duplication" do context "when max_duplicate_search_index_terms limits duplication" do
before { SearchIndexer.enable } before { SearchIndexer.enable }