DEV: adjustments to hot algorithm (#25517)

1. Serial likers will just like a bunch of posts on the same topic, this will
heavily inflate hot score. To avoid artificial "heat" generated by one user only count
the first like on the topic within the recent_cutoff range per topic

2. When looking at recent topics prefer "unique likers", defer to total likes on
older topics cause we do not have an easy count for unique likers

3. Stop taking 1 off like_count, it is not needed - platforms like reddit
allow you to like own post so they need to remove it.
This commit is contained in:
Sam 2024-02-01 17:11:40 +11:00 committed by GitHub
parent 969ab0fd6e
commit 690ff4499c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 37 additions and 11 deletions

View File

@ -66,7 +66,7 @@ class TopicHotScore < ActiveRecord::Base
t.id AS topic_id, t.id AS topic_id,
COUNT(DISTINCT p.user_id) AS unique_participants, COUNT(DISTINCT p.user_id) AS unique_participants,
( (
SELECT COUNT(*) SELECT COUNT(distinct pa.user_id)
FROM post_actions pa FROM post_actions pa
JOIN posts p2 ON p2.id = pa.post_id JOIN posts p2 ON p2.id = pa.post_id
WHERE p2.topic_id = t.id WHERE p2.topic_id = t.id
@ -100,7 +100,10 @@ class TopicHotScore < ActiveRecord::Base
# we need an extra index for this # we need an extra index for this
DB.exec(<<~SQL, args) DB.exec(<<~SQL, args)
UPDATE topic_hot_scores ths UPDATE topic_hot_scores ths
SET score = (topics.like_count - 1) / SET score = (
CASE WHEN topics.created_at > :recent_cutoff
THEN ths.recent_likes ELSE topics.like_count END
) /
(EXTRACT(EPOCH FROM (:now - topics.created_at)) / 3600 + 2) ^ :gravity (EXTRACT(EPOCH FROM (:now - topics.created_at)) / 3600 + 2) ^ :gravity
+ +
CASE WHEN ths.recent_first_bumped_at IS NULL THEN 0 ELSE CASE WHEN ths.recent_first_bumped_at IS NULL THEN 0 ELSE

View File

@ -4,6 +4,7 @@ RSpec.describe TopicHotScore do
describe ".update_scores" do describe ".update_scores" do
fab!(:user) fab!(:user)
fab!(:user2) { Fabricate(:user) } fab!(:user2) { Fabricate(:user) }
fab!(:user3) { Fabricate(:user) }
it "can correctly update like counts and post counts and account for activity" do it "can correctly update like counts and post counts and account for activity" do
freeze_time freeze_time
@ -24,35 +25,57 @@ RSpec.describe TopicHotScore do
PostActionCreator.like(user2, new_reply) PostActionCreator.like(user2, new_reply)
PostActionCreator.like(user, newer_reply) PostActionCreator.like(user, newer_reply)
# user 3 likes two posts, but we should only count 1
# this avoids a single user from trivially inflating hot scores
PostActionCreator.like(user3, new_reply)
PostActionCreator.like(user3, newer_reply)
TopicHotScore.update_scores TopicHotScore.update_scores
hot_scoring = TopicHotScore.find_by(topic_id: topic.id) hot_scoring = TopicHotScore.find_by(topic_id: topic.id)
expect(hot_scoring.recent_likes).to eq(2) expect(hot_scoring.recent_likes).to eq(3)
expect(hot_scoring.recent_posters).to eq(2) expect(hot_scoring.recent_posters).to eq(2)
expect(hot_scoring.recent_first_bumped_at).to eq_time(new_reply.created_at) expect(hot_scoring.recent_first_bumped_at).to eq_time(new_reply.created_at)
expect(hot_scoring.score).to be_within(0.001).of(1.219) expect(hot_scoring.score).to be_within(0.001).of(1.771)
expect(TopicHotScore.find_by(topic_id: -1).recent_likes).to eq(0) expect(TopicHotScore.find_by(topic_id: -1).recent_likes).to eq(0)
end end
it "prefers recent_likes to topic like count for recent topics" do
freeze_time
topic = Fabricate(:topic, created_at: 1.hour.ago)
post = Fabricate(:post, topic: topic, created_at: 1.minute.ago)
PostActionCreator.like(user, post)
TopicHotScore.update_scores
score = TopicHotScore.find_by(topic_id: topic.id).score
topic.update!(like_count: 100)
TopicHotScore.update_scores
expect(TopicHotScore.find_by(topic_id: topic.id).score).to be_within(0.001).of(score)
end
it "can correctly set scores for topics" do it "can correctly set scores for topics" do
freeze_time freeze_time
topic1 = Fabricate(:topic, like_count: 3, created_at: 1.hour.ago) topic1 = Fabricate(:topic, like_count: 3, created_at: 2.weeks.ago)
topic2 = Fabricate(:topic, like_count: 10, created_at: 3.hour.ago) topic2 = Fabricate(:topic, like_count: 10, created_at: 2.weeks.ago)
TopicHotScore.update_scores TopicHotScore.update_scores
expect(TopicHotScore.find_by(topic_id: topic1.id).score).to be_within(0.001).of(0.535) expect(TopicHotScore.find_by(topic_id: topic1.id).score).to be_within(0.001).of(0.002)
expect(TopicHotScore.find_by(topic_id: topic2.id).score).to be_within(0.001).of(1.304) expect(TopicHotScore.find_by(topic_id: topic2.id).score).to be_within(0.001).of(0.009)
freeze_time(2.hours.from_now) freeze_time(6.weeks.from_now)
TopicHotScore.update_scores TopicHotScore.update_scores
expect(TopicHotScore.find_by(topic_id: topic1.id).score).to be_within(0.001).of(0.289) expect(TopicHotScore.find_by(topic_id: topic1.id).score).to be_within(0.0001).of(0.0005)
expect(TopicHotScore.find_by(topic_id: topic2.id).score).to be_within(0.001).of(0.871) expect(TopicHotScore.find_by(topic_id: topic2.id).score).to be_within(0.001).of(0.001)
end end
end end
end end