PERF: Improve performance of `most_replied_to_users` (#26373)

This PR improves the performance of the `most_replied_to_users` method on the `UserSummary` model.

### Old Query
```ruby
    post_query
      .joins(
        "JOIN posts replies ON posts.topic_id = replies.topic_id AND posts.reply_to_post_number = replies.post_number",
      )
      # We are removing replies by @user, but we can simplify this by getting the using the user_id on the posts.
      .where("replies.user_id <> ?", @user.id)
      .group("replies.user_id")
      .order("COUNT(*) DESC")
      .limit(MAX_SUMMARY_RESULTS)
      .pluck("replies.user_id, COUNT(*)")
      .each { |r| replied_users[r[0]] = r[1] }
```
 
### Old Query with corrections

```ruby
post_query
  .joins(
    "JOIN posts replies ON posts.topic_id = replies.topic_id AND replies.reply_to_post_number = posts.post_number",
  )
  # Remove replies by @user but instead look on loaded posts (we do this so we don't count self replies)
  .where("replies.user_id <> posts.user_id")
  .group("replies.user_id")
  .order("COUNT(*) DESC")
  .limit(MAX_SUMMARY_RESULTS)
  .pluck("replies.user_id, COUNT(*)")
  .each { |r| replied_users[r[0]] = r[1] }
```

### New Query
```ruby
    post_query
      .joins(
        "JOIN posts replies ON posts.topic_id = replies.topic_id AND posts.reply_to_post_number = replies.post_number",
      )
      # Only include regular posts in our joins, this makes sure we don't have the bloat of loading private messages
      .joins(
        "JOIN topics ON replies.topic_id = topics.id AND topics.archetype <> 'private_message'",
      )
      # Only include visible post types, so exclude posts like whispers, etc
      .joins(
        "AND replies.post_type IN (#{Topic.visible_post_types(@user, include_moderator_actions: false).join(",")})",
      )
      .where("replies.user_id <> posts.user_id")
      .group("replies.user_id")
      .order("COUNT(*) DESC")
      .limit(MAX_SUMMARY_RESULTS)
      .pluck("replies.user_id, COUNT(*)")
      .each { |r| replied_users[r[0]] = r[1] }
```

# Conclusion

`most_replied_to_users` was untested, so I introduced a test for the logic, and have confirmed that it passes on both the new query **AND** the old query. 

Thank you @danielwaterworth for the debugging assistance.
This commit is contained in:
Isaac Janzen 2024-04-03 14:20:54 -06:00 committed by GitHub
parent 9dc6325821
commit db10dd5319
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 48 additions and 1 deletions

View File

@ -88,7 +88,13 @@ class UserSummary
.joins( .joins(
"JOIN posts replies ON posts.topic_id = replies.topic_id AND posts.reply_to_post_number = replies.post_number", "JOIN posts replies ON posts.topic_id = replies.topic_id AND posts.reply_to_post_number = replies.post_number",
) )
.where("replies.user_id <> ?", @user.id) .joins(
"JOIN topics ON replies.topic_id = topics.id AND topics.archetype <> 'private_message'",
)
.joins(
"AND replies.post_type IN (#{Topic.visible_post_types(@user, include_moderator_actions: false).join(",")})",
)
.where("replies.user_id <> posts.user_id")
.group("replies.user_id") .group("replies.user_id")
.order("COUNT(*) DESC") .order("COUNT(*) DESC")
.limit(MAX_SUMMARY_RESULTS) .limit(MAX_SUMMARY_RESULTS)

View File

@ -90,4 +90,45 @@ RSpec.describe UserSummary do
expect(summary.top_categories.first[:topic_count]).to eq(1) expect(summary.top_categories.first[:topic_count]).to eq(1)
expect(summary.top_categories.first[:post_count]).to eq(1) expect(summary.top_categories.first[:post_count]).to eq(1)
end end
it "returns the most replied to users" do
topic1 = create_post.topic
topic1_post = create_post(topic: topic1)
topic1_reply =
create_post(topic: topic1, reply_to_post_number: topic1_post.post_number, user: topic1.user)
# Create a second topic by the same user as topic1
topic2 = create_post(user: topic1.user).topic
topic2_post = create_post(topic: topic2)
topic2_reply =
create_post(topic: topic2, reply_to_post_number: topic2_post.post_number, user: topic2.user)
# Don't include replies to whispers
topic3 = create_post(user: topic1.user).topic
topic3_post = create_post(topic: topic3, post_type: Post.types[:whisper])
topic3_reply =
create_post(topic: topic3, reply_to_post_number: topic3_post.post_number, user: topic3.user)
# Don't include replies to private messages
replied_to_user = Fabricate(:user)
topic4 =
create_post(
user: topic1.user,
archetype: Archetype.private_message,
target_usernames: [replied_to_user.username],
).topic
topic4_post = create_post(topic: topic4, user: replied_to_user)
topic4_reply =
create_post(topic: topic4, reply_to_post_number: topic4_post.post_number, user: topic4.user)
user_summary = UserSummary.new(topic1.user, Guardian.new(topic1.user))
most_replied_to_users = user_summary.most_replied_to_users
counts =
most_replied_to_users
.index_by { |user_with_count| user_with_count[:id] }
.transform_values { |c| c[:count] }
expect(counts).to eq({ topic1_post.user_id => 1, topic2_post.user_id => 1 })
end
end end