From c5345d0e5402b36f896fe430127926e306d1e197 Mon Sep 17 00:00:00 2001 From: Sam Date: Tue, 31 Jan 2023 16:34:01 +1100 Subject: [PATCH] FEATURE: prioritize_exact_search_title_match hidden setting (#20089) The new `prioritize_exact_search_match` can be used to force the search algorithm to prioritize exact term matches in title when ranking results. This is scoped narrowly to titles for cases such as a topic titled: "organisation chart" and a search of "org chart". If we scoped this wider, all discussion about "org chart" would float to the top and leave a very common title de-prioritized. This is a hidden site setting and it has some performance impact due to double ranking. That said, performance impact is somewhat mitigated cause ranking on title alone is a very cheap operation. --- config/site_settings.yml | 3 ++ lib/search.rb | 68 ++++++++++++++++++++++++++++++---------- spec/lib/search_spec.rb | 29 +++++++++++++++++ 3 files changed, 83 insertions(+), 17 deletions(-) diff --git a/config/site_settings.yml b/config/site_settings.yml index 1963eef2620..d0d1fd371f2 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -2194,6 +2194,9 @@ backups: client: true search: + prioritize_exact_search_title_match: + default: false + hidden: true max_duplicate_search_index_terms: default: -1 hidden: true diff --git a/lib/search.rb b/lib/search.rb index 994e37fadf8..2afd2d1bae0 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -1132,14 +1132,13 @@ class Search posts = posts.order("posts.like_count DESC") end elsif !is_topic_search - rank = <<~SQL - TS_RANK_CD( - #{SiteSetting.search_ranking_weights.present? ? "'#{SiteSetting.search_ranking_weights}'," : ""} - post_search_data.search_data, - #{@term.blank? ? "" : ts_query(weight_filter: weights)}, - #{SiteSetting.search_ranking_normalization}|32 - ) - SQL + exact_rank = nil + + if SiteSetting.prioritize_exact_search_title_match + exact_rank = ts_rank_cd(weight_filter: "A", prefix_match: false) + end + + rank = ts_rank_cd(weight_filter: weights) if type_filter != "private_messages" category_search_priority = <<~SQL @@ -1170,6 +1169,22 @@ class Search ) SQL + posts = + if aggregate_search + posts.order("MAX(#{category_search_priority}) DESC") + else + posts.order("#{category_search_priority} DESC") + end + + if @term.present? && exact_rank + posts = + if aggregate_search + posts.order("MAX(#{exact_rank} * #{category_priority_weights}) DESC") + else + posts.order("#{exact_rank} * #{category_priority_weights} DESC") + end + end + data_ranking = if @term.blank? "(#{category_priority_weights})" @@ -1179,9 +1194,9 @@ class Search posts = if aggregate_search - posts.order("MAX(#{category_search_priority}) DESC", "MAX(#{data_ranking}) DESC") + posts.order("MAX(#{data_ranking}) DESC") else - posts.order("#{category_search_priority} DESC", "#{data_ranking} DESC") + posts.order("#{data_ranking} DESC") end end @@ -1211,6 +1226,17 @@ class Search posts.limit(limit) end + def ts_rank_cd(weight_filter:, prefix_match: true) + <<~SQL + TS_RANK_CD( + #{SiteSetting.search_ranking_weights.present? ? "'#{SiteSetting.search_ranking_weights}'," : ""} + post_search_data.search_data, + #{@term.blank? ? "" : ts_query(weight_filter: weight_filter, prefix_match: prefix_match)}, + #{SiteSetting.search_ranking_normalization}|32 + ) + SQL + end + def categories_ignored(posts) posts.where(<<~SQL, Searchable::PRIORITIES[:ignore]) (categories.search_priority IS NULL OR categories.search_priority IS NOT NULL AND categories.search_priority <> ?) @@ -1225,8 +1251,11 @@ class Search self.class.default_ts_config end - def self.ts_query(term:, ts_config: nil, joiner: nil, weight_filter: nil) - to_tsquery(ts_config: ts_config, term: set_tsquery_weight_filter(term, weight_filter)) + def self.ts_query(term:, ts_config: nil, joiner: nil, weight_filter: nil, prefix_match: true) + to_tsquery( + ts_config: ts_config, + term: set_tsquery_weight_filter(term, weight_filter, prefix_match: prefix_match), + ) end def self.to_tsquery(ts_config: nil, term:, joiner: nil) @@ -1237,8 +1266,8 @@ class Search tsquery end - def self.set_tsquery_weight_filter(term, weight_filter) - "'#{self.escape_string(term)}':*#{weight_filter}" + def self.set_tsquery_weight_filter(term, weight_filter, prefix_match: true) + "'#{self.escape_string(term)}':#{prefix_match ? "*" : ""}#{weight_filter}" end def self.escape_string(term) @@ -1251,11 +1280,16 @@ class Search PG::Connection.escape_string(term).gsub('\\', '\\\\\\') end - def ts_query(ts_config = nil, weight_filter: nil) + def ts_query(ts_config = nil, weight_filter: nil, prefix_match: true) @ts_query_cache ||= {} @ts_query_cache[ - "#{ts_config || default_ts_config} #{@term} #{weight_filter}" - ] ||= Search.ts_query(term: @term, ts_config: ts_config, weight_filter: weight_filter) + "#{ts_config || default_ts_config} #{@term} #{weight_filter} #{prefix_match}" + ] ||= Search.ts_query( + term: @term, + ts_config: ts_config, + weight_filter: weight_filter, + prefix_match: prefix_match, + ) end def wrap_rows(query) diff --git a/spec/lib/search_spec.rb b/spec/lib/search_spec.rb index 05c1f9b9870..758d72e4f0a 100644 --- a/spec/lib/search_spec.rb +++ b/spec/lib/search_spec.rb @@ -2603,6 +2603,35 @@ RSpec.describe Search do end end + context "when prioritize_exact_search_match is enabled" do + before { SearchIndexer.enable } + + after { SearchIndexer.disable } + + it "correctly ranks topics" do + SiteSetting.prioritize_exact_search_title_match = true + + topic1 = Fabricate(:topic, title: "saml saml saml is the best") + post1 = Fabricate(:post, topic: topic1, raw: "this topic is a story about saml") + + topic2 = Fabricate(:topic, title: "sam has ideas about lots of things") + post2 = Fabricate(:post, topic: topic2, raw: "this topic is not about saml saml saml") + + topic3 = Fabricate(:topic, title: "jane has ideas about lots of things") + post3 = Fabricate(:post, topic: topic3, raw: "sam sam sam sam lets add sams") + + SearchIndexer.index(post1, force: true) + SearchIndexer.index(post2, force: true) + SearchIndexer.index(post3, force: true) + + result = Search.execute("sam") + expect(result.posts.length).to eq(3) + + # title match should win cause we limited duplication + expect(result.posts.pluck(:id)).to eq([post2.id, post1.id, post3.id]) + end + end + context "when max_duplicate_search_index_terms limits duplication" do before { SearchIndexer.enable }