FEATURE: prioritize_exact_search_title_match hidden setting (#20089)
The new `prioritize_exact_search_match` can be used to force the search algorithm to prioritize exact term matches in title when ranking results. This is scoped narrowly to titles for cases such as a topic titled: "organisation chart" and a search of "org chart". If we scoped this wider, all discussion about "org chart" would float to the top and leave a very common title de-prioritized. This is a hidden site setting and it has some performance impact due to double ranking. That said, performance impact is somewhat mitigated cause ranking on title alone is a very cheap operation.
This commit is contained in:
parent
f31f0b70f8
commit
c5345d0e54
|
@ -2194,6 +2194,9 @@ backups:
|
||||||
client: true
|
client: true
|
||||||
|
|
||||||
search:
|
search:
|
||||||
|
prioritize_exact_search_title_match:
|
||||||
|
default: false
|
||||||
|
hidden: true
|
||||||
max_duplicate_search_index_terms:
|
max_duplicate_search_index_terms:
|
||||||
default: -1
|
default: -1
|
||||||
hidden: true
|
hidden: true
|
||||||
|
|
|
@ -1132,14 +1132,13 @@ class Search
|
||||||
posts = posts.order("posts.like_count DESC")
|
posts = posts.order("posts.like_count DESC")
|
||||||
end
|
end
|
||||||
elsif !is_topic_search
|
elsif !is_topic_search
|
||||||
rank = <<~SQL
|
exact_rank = nil
|
||||||
TS_RANK_CD(
|
|
||||||
#{SiteSetting.search_ranking_weights.present? ? "'#{SiteSetting.search_ranking_weights}'," : ""}
|
if SiteSetting.prioritize_exact_search_title_match
|
||||||
post_search_data.search_data,
|
exact_rank = ts_rank_cd(weight_filter: "A", prefix_match: false)
|
||||||
#{@term.blank? ? "" : ts_query(weight_filter: weights)},
|
end
|
||||||
#{SiteSetting.search_ranking_normalization}|32
|
|
||||||
)
|
rank = ts_rank_cd(weight_filter: weights)
|
||||||
SQL
|
|
||||||
|
|
||||||
if type_filter != "private_messages"
|
if type_filter != "private_messages"
|
||||||
category_search_priority = <<~SQL
|
category_search_priority = <<~SQL
|
||||||
|
@ -1170,6 +1169,22 @@ class Search
|
||||||
)
|
)
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
|
posts =
|
||||||
|
if aggregate_search
|
||||||
|
posts.order("MAX(#{category_search_priority}) DESC")
|
||||||
|
else
|
||||||
|
posts.order("#{category_search_priority} DESC")
|
||||||
|
end
|
||||||
|
|
||||||
|
if @term.present? && exact_rank
|
||||||
|
posts =
|
||||||
|
if aggregate_search
|
||||||
|
posts.order("MAX(#{exact_rank} * #{category_priority_weights}) DESC")
|
||||||
|
else
|
||||||
|
posts.order("#{exact_rank} * #{category_priority_weights} DESC")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
data_ranking =
|
data_ranking =
|
||||||
if @term.blank?
|
if @term.blank?
|
||||||
"(#{category_priority_weights})"
|
"(#{category_priority_weights})"
|
||||||
|
@ -1179,9 +1194,9 @@ class Search
|
||||||
|
|
||||||
posts =
|
posts =
|
||||||
if aggregate_search
|
if aggregate_search
|
||||||
posts.order("MAX(#{category_search_priority}) DESC", "MAX(#{data_ranking}) DESC")
|
posts.order("MAX(#{data_ranking}) DESC")
|
||||||
else
|
else
|
||||||
posts.order("#{category_search_priority} DESC", "#{data_ranking} DESC")
|
posts.order("#{data_ranking} DESC")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1211,6 +1226,17 @@ class Search
|
||||||
posts.limit(limit)
|
posts.limit(limit)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def ts_rank_cd(weight_filter:, prefix_match: true)
|
||||||
|
<<~SQL
|
||||||
|
TS_RANK_CD(
|
||||||
|
#{SiteSetting.search_ranking_weights.present? ? "'#{SiteSetting.search_ranking_weights}'," : ""}
|
||||||
|
post_search_data.search_data,
|
||||||
|
#{@term.blank? ? "" : ts_query(weight_filter: weight_filter, prefix_match: prefix_match)},
|
||||||
|
#{SiteSetting.search_ranking_normalization}|32
|
||||||
|
)
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
def categories_ignored(posts)
|
def categories_ignored(posts)
|
||||||
posts.where(<<~SQL, Searchable::PRIORITIES[:ignore])
|
posts.where(<<~SQL, Searchable::PRIORITIES[:ignore])
|
||||||
(categories.search_priority IS NULL OR categories.search_priority IS NOT NULL AND categories.search_priority <> ?)
|
(categories.search_priority IS NULL OR categories.search_priority IS NOT NULL AND categories.search_priority <> ?)
|
||||||
|
@ -1225,8 +1251,11 @@ class Search
|
||||||
self.class.default_ts_config
|
self.class.default_ts_config
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.ts_query(term:, ts_config: nil, joiner: nil, weight_filter: nil)
|
def self.ts_query(term:, ts_config: nil, joiner: nil, weight_filter: nil, prefix_match: true)
|
||||||
to_tsquery(ts_config: ts_config, term: set_tsquery_weight_filter(term, weight_filter))
|
to_tsquery(
|
||||||
|
ts_config: ts_config,
|
||||||
|
term: set_tsquery_weight_filter(term, weight_filter, prefix_match: prefix_match),
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.to_tsquery(ts_config: nil, term:, joiner: nil)
|
def self.to_tsquery(ts_config: nil, term:, joiner: nil)
|
||||||
|
@ -1237,8 +1266,8 @@ class Search
|
||||||
tsquery
|
tsquery
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.set_tsquery_weight_filter(term, weight_filter)
|
def self.set_tsquery_weight_filter(term, weight_filter, prefix_match: true)
|
||||||
"'#{self.escape_string(term)}':*#{weight_filter}"
|
"'#{self.escape_string(term)}':#{prefix_match ? "*" : ""}#{weight_filter}"
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.escape_string(term)
|
def self.escape_string(term)
|
||||||
|
@ -1251,11 +1280,16 @@ class Search
|
||||||
PG::Connection.escape_string(term).gsub('\\', '\\\\\\')
|
PG::Connection.escape_string(term).gsub('\\', '\\\\\\')
|
||||||
end
|
end
|
||||||
|
|
||||||
def ts_query(ts_config = nil, weight_filter: nil)
|
def ts_query(ts_config = nil, weight_filter: nil, prefix_match: true)
|
||||||
@ts_query_cache ||= {}
|
@ts_query_cache ||= {}
|
||||||
@ts_query_cache[
|
@ts_query_cache[
|
||||||
"#{ts_config || default_ts_config} #{@term} #{weight_filter}"
|
"#{ts_config || default_ts_config} #{@term} #{weight_filter} #{prefix_match}"
|
||||||
] ||= Search.ts_query(term: @term, ts_config: ts_config, weight_filter: weight_filter)
|
] ||= Search.ts_query(
|
||||||
|
term: @term,
|
||||||
|
ts_config: ts_config,
|
||||||
|
weight_filter: weight_filter,
|
||||||
|
prefix_match: prefix_match,
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
def wrap_rows(query)
|
def wrap_rows(query)
|
||||||
|
|
|
@ -2603,6 +2603,35 @@ RSpec.describe Search do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "when prioritize_exact_search_match is enabled" do
|
||||||
|
before { SearchIndexer.enable }
|
||||||
|
|
||||||
|
after { SearchIndexer.disable }
|
||||||
|
|
||||||
|
it "correctly ranks topics" do
|
||||||
|
SiteSetting.prioritize_exact_search_title_match = true
|
||||||
|
|
||||||
|
topic1 = Fabricate(:topic, title: "saml saml saml is the best")
|
||||||
|
post1 = Fabricate(:post, topic: topic1, raw: "this topic is a story about saml")
|
||||||
|
|
||||||
|
topic2 = Fabricate(:topic, title: "sam has ideas about lots of things")
|
||||||
|
post2 = Fabricate(:post, topic: topic2, raw: "this topic is not about saml saml saml")
|
||||||
|
|
||||||
|
topic3 = Fabricate(:topic, title: "jane has ideas about lots of things")
|
||||||
|
post3 = Fabricate(:post, topic: topic3, raw: "sam sam sam sam lets add sams")
|
||||||
|
|
||||||
|
SearchIndexer.index(post1, force: true)
|
||||||
|
SearchIndexer.index(post2, force: true)
|
||||||
|
SearchIndexer.index(post3, force: true)
|
||||||
|
|
||||||
|
result = Search.execute("sam")
|
||||||
|
expect(result.posts.length).to eq(3)
|
||||||
|
|
||||||
|
# title match should win cause we limited duplication
|
||||||
|
expect(result.posts.pluck(:id)).to eq([post2.id, post1.id, post3.id])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
context "when max_duplicate_search_index_terms limits duplication" do
|
context "when max_duplicate_search_index_terms limits duplication" do
|
||||||
before { SearchIndexer.enable }
|
before { SearchIndexer.enable }
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue