FIX: Search by relevance may return incorrect post number.
Follow up to d8c796bc4
.
Note that his change increases query time by around 40% in the following
benchmark against `dev.discourse.org` but this is a tradeoff that has to be taken so that relevance
search is accurate.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number ORDER BY rank DESC, bumped_at DESC))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
18.040 (± 0.0%) i/s - 181.000 in 10.035241s
current aggregate search query with proper ranking
12.992 (± 0.0%) i/s - 130.000 in 10.007214s
Comparison:
current aggregate search query: 18.0 i/s
current aggregate search query with proper ranking: 13.0 i/s - 1.39x (± 0.00) slower
```
This commit is contained in:
parent
97f581e1df
commit
5bf0a0893b
|
@ -896,10 +896,7 @@ class Search
|
||||||
|
|
||||||
if aggregate_search
|
if aggregate_search
|
||||||
aggregate_relation = Post.unscoped
|
aggregate_relation = Post.unscoped
|
||||||
.select(
|
.select("subquery.topic_id id")
|
||||||
"subquery.topic_id id",
|
|
||||||
"(ARRAY_AGG(subquery.post_number))[1] post_number",
|
|
||||||
)
|
|
||||||
.group("subquery.topic_id")
|
.group("subquery.topic_id")
|
||||||
|
|
||||||
posts = posts.select(posts.arel.projections)
|
posts = posts.select(posts.arel.projections)
|
||||||
|
@ -910,7 +907,10 @@ class Search
|
||||||
|
|
||||||
if aggregate_search
|
if aggregate_search
|
||||||
aggregate_relation = aggregate_relation
|
aggregate_relation = aggregate_relation
|
||||||
.select("MAX(subquery.created_at) created_at")
|
.select(
|
||||||
|
"(ARRAY_AGG(subquery.post_number ORDER BY subquery.created_at DESC))[1] post_number",
|
||||||
|
"MAX(subquery.created_at) created_at"
|
||||||
|
)
|
||||||
.order("created_at DESC")
|
.order("created_at DESC")
|
||||||
end
|
end
|
||||||
elsif @order == :latest_topic
|
elsif @order == :latest_topic
|
||||||
|
@ -920,7 +920,10 @@ class Search
|
||||||
posts = posts.select("topics.created_at topic_created_at")
|
posts = posts.select("topics.created_at topic_created_at")
|
||||||
|
|
||||||
aggregate_relation = aggregate_relation
|
aggregate_relation = aggregate_relation
|
||||||
.select("MAX(subquery.topic_created_at) topic_created_at")
|
.select(
|
||||||
|
"(ARRAY_AGG(subquery.post_number ORDER BY subquery.topic_created_at DESC))[1] post_number",
|
||||||
|
"MAX(subquery.topic_created_at) topic_created_at"
|
||||||
|
)
|
||||||
.order("topic_created_at DESC")
|
.order("topic_created_at DESC")
|
||||||
end
|
end
|
||||||
elsif @order == :views
|
elsif @order == :views
|
||||||
|
@ -930,7 +933,10 @@ class Search
|
||||||
posts = posts.select("topics.views topic_views")
|
posts = posts.select("topics.views topic_views")
|
||||||
|
|
||||||
aggregate_relation = aggregate_relation
|
aggregate_relation = aggregate_relation
|
||||||
.select("MAX(subquery.topic_views) topic_views")
|
.select(
|
||||||
|
"(ARRAY_AGG(subquery.post_number ORDER BY subquery.topic_views DESC))[1] post_number",
|
||||||
|
"MAX(subquery.topic_views) topic_views"
|
||||||
|
)
|
||||||
.order("topic_views DESC")
|
.order("topic_views DESC")
|
||||||
end
|
end
|
||||||
elsif @order == :likes
|
elsif @order == :likes
|
||||||
|
@ -976,7 +982,10 @@ class Search
|
||||||
.order("rank DESC", "topic_bumped_at DESC")
|
.order("rank DESC", "topic_bumped_at DESC")
|
||||||
|
|
||||||
aggregate_relation = aggregate_relation
|
aggregate_relation = aggregate_relation
|
||||||
.select("MAX(subquery.rank) rank", "MAX(subquery.topic_bumped_at) topic_bumped_at")
|
.select(
|
||||||
|
"(ARRAY_AGG(subquery.post_number ORDER BY subquery.rank DESC, subquery.topic_bumped_at DESC))[1] post_number",
|
||||||
|
"MAX(subquery.rank) rank", "MAX(subquery.topic_bumped_at) topic_bumped_at"
|
||||||
|
)
|
||||||
.order("rank DESC", "topic_bumped_at DESC")
|
.order("rank DESC", "topic_bumped_at DESC")
|
||||||
else
|
else
|
||||||
posts = posts.order("#{data_ranking} DESC", "topics.bumped_at DESC")
|
posts = posts.order("#{data_ranking} DESC", "topics.bumped_at DESC")
|
||||||
|
|
|
@ -447,9 +447,13 @@ describe Search do
|
||||||
|
|
||||||
it 'aggregates searches in a topic by returning the post with the highest rank' do
|
it 'aggregates searches in a topic by returning the post with the highest rank' do
|
||||||
post = Fabricate(:post, topic: topic, raw: "this is a play post")
|
post = Fabricate(:post, topic: topic, raw: "this is a play post")
|
||||||
post2 = Fabricate(:post, topic: topic, raw: "play playing played")
|
post2 = Fabricate(:post, topic: topic, raw: "play play playing played play")
|
||||||
post3 = Fabricate(:post, raw: "this is a play post")
|
post3 = Fabricate(:post, raw: "this is a play post")
|
||||||
|
|
||||||
|
5.times do
|
||||||
|
Fabricate(:post, topic: topic, raw: "play playing played")
|
||||||
|
end
|
||||||
|
|
||||||
results = Search.execute('play')
|
results = Search.execute('play')
|
||||||
|
|
||||||
expect(results.posts.map(&:id)).to eq([
|
expect(results.posts.map(&:id)).to eq([
|
||||||
|
|
Loading…
Reference in New Issue