FEATURE: prioritize_exact_search_title_match hidden setting (#20089)
The new `prioritize_exact_search_match` can be used to force the search algorithm to prioritize exact term matches in title when ranking results. This is scoped narrowly to titles for cases such as a topic titled: "organisation chart" and a search of "org chart". If we scoped this wider, all discussion about "org chart" would float to the top and leave a very common title de-prioritized. This is a hidden site setting and it has some performance impact due to double ranking. That said, performance impact is somewhat mitigated cause ranking on title alone is a very cheap operation.
This commit is contained in:
parent
f31f0b70f8
commit
c5345d0e54
|
@ -2194,6 +2194,9 @@ backups:
|
|||
client: true
|
||||
|
||||
search:
|
||||
prioritize_exact_search_title_match:
|
||||
default: false
|
||||
hidden: true
|
||||
max_duplicate_search_index_terms:
|
||||
default: -1
|
||||
hidden: true
|
||||
|
|
|
@ -1132,14 +1132,13 @@ class Search
|
|||
posts = posts.order("posts.like_count DESC")
|
||||
end
|
||||
elsif !is_topic_search
|
||||
rank = <<~SQL
|
||||
TS_RANK_CD(
|
||||
#{SiteSetting.search_ranking_weights.present? ? "'#{SiteSetting.search_ranking_weights}'," : ""}
|
||||
post_search_data.search_data,
|
||||
#{@term.blank? ? "" : ts_query(weight_filter: weights)},
|
||||
#{SiteSetting.search_ranking_normalization}|32
|
||||
)
|
||||
SQL
|
||||
exact_rank = nil
|
||||
|
||||
if SiteSetting.prioritize_exact_search_title_match
|
||||
exact_rank = ts_rank_cd(weight_filter: "A", prefix_match: false)
|
||||
end
|
||||
|
||||
rank = ts_rank_cd(weight_filter: weights)
|
||||
|
||||
if type_filter != "private_messages"
|
||||
category_search_priority = <<~SQL
|
||||
|
@ -1170,6 +1169,22 @@ class Search
|
|||
)
|
||||
SQL
|
||||
|
||||
posts =
|
||||
if aggregate_search
|
||||
posts.order("MAX(#{category_search_priority}) DESC")
|
||||
else
|
||||
posts.order("#{category_search_priority} DESC")
|
||||
end
|
||||
|
||||
if @term.present? && exact_rank
|
||||
posts =
|
||||
if aggregate_search
|
||||
posts.order("MAX(#{exact_rank} * #{category_priority_weights}) DESC")
|
||||
else
|
||||
posts.order("#{exact_rank} * #{category_priority_weights} DESC")
|
||||
end
|
||||
end
|
||||
|
||||
data_ranking =
|
||||
if @term.blank?
|
||||
"(#{category_priority_weights})"
|
||||
|
@ -1179,9 +1194,9 @@ class Search
|
|||
|
||||
posts =
|
||||
if aggregate_search
|
||||
posts.order("MAX(#{category_search_priority}) DESC", "MAX(#{data_ranking}) DESC")
|
||||
posts.order("MAX(#{data_ranking}) DESC")
|
||||
else
|
||||
posts.order("#{category_search_priority} DESC", "#{data_ranking} DESC")
|
||||
posts.order("#{data_ranking} DESC")
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -1211,6 +1226,17 @@ class Search
|
|||
posts.limit(limit)
|
||||
end
|
||||
|
||||
def ts_rank_cd(weight_filter:, prefix_match: true)
|
||||
<<~SQL
|
||||
TS_RANK_CD(
|
||||
#{SiteSetting.search_ranking_weights.present? ? "'#{SiteSetting.search_ranking_weights}'," : ""}
|
||||
post_search_data.search_data,
|
||||
#{@term.blank? ? "" : ts_query(weight_filter: weight_filter, prefix_match: prefix_match)},
|
||||
#{SiteSetting.search_ranking_normalization}|32
|
||||
)
|
||||
SQL
|
||||
end
|
||||
|
||||
def categories_ignored(posts)
|
||||
posts.where(<<~SQL, Searchable::PRIORITIES[:ignore])
|
||||
(categories.search_priority IS NULL OR categories.search_priority IS NOT NULL AND categories.search_priority <> ?)
|
||||
|
@ -1225,8 +1251,11 @@ class Search
|
|||
self.class.default_ts_config
|
||||
end
|
||||
|
||||
def self.ts_query(term:, ts_config: nil, joiner: nil, weight_filter: nil)
|
||||
to_tsquery(ts_config: ts_config, term: set_tsquery_weight_filter(term, weight_filter))
|
||||
def self.ts_query(term:, ts_config: nil, joiner: nil, weight_filter: nil, prefix_match: true)
|
||||
to_tsquery(
|
||||
ts_config: ts_config,
|
||||
term: set_tsquery_weight_filter(term, weight_filter, prefix_match: prefix_match),
|
||||
)
|
||||
end
|
||||
|
||||
def self.to_tsquery(ts_config: nil, term:, joiner: nil)
|
||||
|
@ -1237,8 +1266,8 @@ class Search
|
|||
tsquery
|
||||
end
|
||||
|
||||
def self.set_tsquery_weight_filter(term, weight_filter)
|
||||
"'#{self.escape_string(term)}':*#{weight_filter}"
|
||||
def self.set_tsquery_weight_filter(term, weight_filter, prefix_match: true)
|
||||
"'#{self.escape_string(term)}':#{prefix_match ? "*" : ""}#{weight_filter}"
|
||||
end
|
||||
|
||||
def self.escape_string(term)
|
||||
|
@ -1251,11 +1280,16 @@ class Search
|
|||
PG::Connection.escape_string(term).gsub('\\', '\\\\\\')
|
||||
end
|
||||
|
||||
def ts_query(ts_config = nil, weight_filter: nil)
|
||||
def ts_query(ts_config = nil, weight_filter: nil, prefix_match: true)
|
||||
@ts_query_cache ||= {}
|
||||
@ts_query_cache[
|
||||
"#{ts_config || default_ts_config} #{@term} #{weight_filter}"
|
||||
] ||= Search.ts_query(term: @term, ts_config: ts_config, weight_filter: weight_filter)
|
||||
"#{ts_config || default_ts_config} #{@term} #{weight_filter} #{prefix_match}"
|
||||
] ||= Search.ts_query(
|
||||
term: @term,
|
||||
ts_config: ts_config,
|
||||
weight_filter: weight_filter,
|
||||
prefix_match: prefix_match,
|
||||
)
|
||||
end
|
||||
|
||||
def wrap_rows(query)
|
||||
|
|
|
@ -2603,6 +2603,35 @@ RSpec.describe Search do
|
|||
end
|
||||
end
|
||||
|
||||
context "when prioritize_exact_search_match is enabled" do
|
||||
before { SearchIndexer.enable }
|
||||
|
||||
after { SearchIndexer.disable }
|
||||
|
||||
it "correctly ranks topics" do
|
||||
SiteSetting.prioritize_exact_search_title_match = true
|
||||
|
||||
topic1 = Fabricate(:topic, title: "saml saml saml is the best")
|
||||
post1 = Fabricate(:post, topic: topic1, raw: "this topic is a story about saml")
|
||||
|
||||
topic2 = Fabricate(:topic, title: "sam has ideas about lots of things")
|
||||
post2 = Fabricate(:post, topic: topic2, raw: "this topic is not about saml saml saml")
|
||||
|
||||
topic3 = Fabricate(:topic, title: "jane has ideas about lots of things")
|
||||
post3 = Fabricate(:post, topic: topic3, raw: "sam sam sam sam lets add sams")
|
||||
|
||||
SearchIndexer.index(post1, force: true)
|
||||
SearchIndexer.index(post2, force: true)
|
||||
SearchIndexer.index(post3, force: true)
|
||||
|
||||
result = Search.execute("sam")
|
||||
expect(result.posts.length).to eq(3)
|
||||
|
||||
# title match should win cause we limited duplication
|
||||
expect(result.posts.pluck(:id)).to eq([post2.id, post1.id, post3.id])
|
||||
end
|
||||
end
|
||||
|
||||
context "when max_duplicate_search_index_terms limits duplication" do
|
||||
before { SearchIndexer.enable }
|
||||
|
||||
|
|
Loading…
Reference in New Issue