From 4b3f65bb26c9d72f85b10f69956cd8ab75978d0a Mon Sep 17 00:00:00 2001 From: Alan Guo Xiang Tan Date: Mon, 1 Feb 2021 13:40:06 +0800 Subject: [PATCH] FIX: Select earliest post when aggregating posts in a topic for search. This is a revert of https://github.com/discourse/discourse/commit/d8c796bc44d84a9e177737153793e5acc276adec and https://github.com/discourse/discourse/commit/5bf0a0893b3e508383e1fe2558cc54e12147a4f3. Linking to the post within a topic that has the highest rank was confusing users and hard to explain because ranking is determined via the PG ranking function. See the following meta topics for the complaints after we switch to the new ordering: 1. https://meta.discourse.org/t/title-search-not-working-as-expected/157737 2. https://meta.discourse.org/t/search-results-should-prioritize-first-post-in-topic-when-title-matches-search-term/175154 --- lib/search.rb | 126 +++++++++++---------------------- spec/components/search_spec.rb | 10 +-- 2 files changed, 49 insertions(+), 87 deletions(-) diff --git a/lib/search.rb b/lib/search.rb index 90de3f7c6a0..934660f06d0 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -986,53 +986,30 @@ class Search posts end - if aggregate_search - aggregate_relation = Post.unscoped - .select("subquery.topic_id id") - .group("subquery.topic_id") - - posts = posts.select(posts.arel.projections) - end - if @order == :latest - posts = posts.reorder("posts.created_at DESC") - if aggregate_search - aggregate_relation = aggregate_relation - .select( - "MAX(subquery.post_number) post_number", - "MAX(subquery.created_at) created_at" - ) - .order("created_at DESC") + posts = posts.order("MAX(posts.created_at) DESC") + else + posts = posts.reorder("posts.created_at DESC") end elsif @order == :latest_topic - posts = posts.order("topics.created_at DESC") - if aggregate_search - posts = posts.select("topics.created_at topic_created_at") - - aggregate_relation = aggregate_relation - .select( - "MIN(subquery.post_number) post_number", - "MAX(subquery.topic_created_at) topic_created_at" - ) - .order("topic_created_at DESC") + posts = posts.order("MAX(topics.created_at) DESC") + else + posts = posts.order("topics.created_at DESC") end elsif @order == :views - posts = posts.order("topics.views DESC") - if aggregate_search - posts = posts.select("topics.views topic_views") - - aggregate_relation = aggregate_relation - .select( - "MIN(subquery.post_number) post_number", - "MAX(subquery.topic_views) topic_views" - ) - .order("topic_views DESC") + posts = posts.order("MAX(topics.views) DESC") + else + posts = posts.order("topics.views DESC") end elsif @order == :likes - posts = posts.order("posts.like_count DESC") + if aggregate_search + posts = posts.order("MAX(posts.like_count) DESC") + else + posts = posts.order("posts.like_count DESC") + end else rank = <<~SQL TS_RANK_CD( @@ -1069,31 +1046,22 @@ class Search "(#{rank} * #{category_priority_weights})" end - if aggregate_search - posts = posts.select("#{data_ranking} rank", "topics.bumped_at topic_bumped_at") - .order("rank DESC", "topic_bumped_at DESC") + posts = + if aggregate_search + posts.order("MAX(#{data_ranking}) DESC") + else + posts.order("#{data_ranking} DESC") + end - aggregate_relation = aggregate_relation - .select( - "(ARRAY_AGG(subquery.post_number ORDER BY subquery.rank DESC, subquery.topic_bumped_at DESC))[1] post_number", - "MAX(subquery.rank) rank", "MAX(subquery.topic_bumped_at) topic_bumped_at" - ) - .order("rank DESC", "topic_bumped_at DESC") + posts = posts.order("topics.bumped_at DESC") + end + + posts = + if secure_category_ids.present? + posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))", secure_category_ids).references(:categories) else - posts = posts.order("#{data_ranking} DESC", "topics.bumped_at DESC") + posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references(:categories) end - end - - if secure_category_ids.present? - posts = posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))", secure_category_ids).references(:categories) - else - posts = posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references(:categories) - end - - if aggregate_search - posts = yield(posts) if block_given? - posts = aggregate_relation.from(posts) - end if @order advanced_order = Search.advanced_orders&.fetch(@order, nil) @@ -1169,36 +1137,28 @@ class Search Search.min_post_id end - if @order == :likes - # likes are a pain to aggregate so skip - query = posts_query(limit, **default_opts).select('topics.id', 'posts.post_number') + min_or_max = @order == :latest ? "max" : "min" - if min_id > 0 - low_set = query.dup.where("post_search_data.post_id < #{min_id}") - high_set = query.where("post_search_data.post_id >= #{min_id}") - - { default: wrap_rows(high_set), remaining: wrap_rows(low_set) } + query = + if @order == :likes + # likes are a pain to aggregate so skip + posts_query(limit, type_filter: opts[:type_filter]) + .select('topics.id', "posts.post_number") else - { default: wrap_rows(query) } - end - else - query = posts_query(limit, **default_opts, aggregate_search: true) do |posts| - if min_id > 0 - posts.select("post_search_data.post_id post_search_data_post_id") - else - posts - end + posts_query(limit, aggregate_search: true, type_filter: opts[:type_filter]) + .select('topics.id', "#{min_or_max}(posts.post_number) post_number") + .group('topics.id') end - if min_id > 0 - low_set = query.dup.where("subquery.post_search_data_post_id < #{min_id}") - high_set = query.where("subquery.post_search_data_post_id >= #{min_id}") + if min_id > 0 + low_set = query.dup.where("post_search_data.post_id < #{min_id}") + high_set = query.where("post_search_data.post_id >= #{min_id}") - { default: wrap_rows(high_set), remaining: wrap_rows(low_set) } - else - { default: wrap_rows(query) } - end + return { default: wrap_rows(high_set), remaining: wrap_rows(low_set) } end + + # double wrapping so we get correct row numbers + { default: wrap_rows(query) } end def aggregate_posts(post_sql) diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index 16a583a2a5b..be6e051e890 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -601,8 +601,8 @@ describe Search do expect(result.posts.pluck(:id)).to eq([post2.id, post.id]) end - it 'aggregates searches in a topic by returning the post with the highest rank' do - _post = Fabricate(:post, topic: topic, raw: "this is a play post") + it 'aggregates searches in a topic by returning the post with the lowest post number' do + post = Fabricate(:post, topic: topic, raw: "this is a play post") post2 = Fabricate(:post, topic: topic, raw: "play play playing played play") post3 = Fabricate(:post, raw: "this is a play post") @@ -613,7 +613,7 @@ describe Search do results = Search.execute('play') expect(results.posts.map(&:id)).to eq([ - post2.id, + post.id, post3.id ]) end @@ -1892,9 +1892,11 @@ describe Search do it 'allows to define custom order' do expect(Search.new("advanced").execute.posts).to eq([post1, post0]) + Search.advanced_order(:chars) do |posts| - posts.reorder("(SELECT LENGTH(raw) FROM posts WHERE posts.topic_id = subquery.topic_id) DESC") + posts.reorder("MAX(LENGTH(posts.raw)) DESC") end + expect(Search.new("advanced order:chars").execute.posts).to eq([post0, post1]) end end