FEATURE: experiment with hot sort order (#25274)

This introduces a new experimental hot sort ordering. 

It attempts to float top conversations by first prioritizing a  topics with lots of recent activity (likes and users responding) 

The schedule that updates hot topics is disabled unless the hidden site setting: `experimental_hot_topics` is enabled. 

You can control "decay" with `hot_topic_gravity` and `recency` with `hot_topics_recent_days` 

Data is stored in the new `topic_hot_scores` table and you can check it out on the `/hot` route once 
enabled. 
---------

Co-authored-by: Penar Musaraj <pmusaraj@gmail.com>
This commit is contained in:
Sam 2024-01-17 13:01:04 +11:00 committed by GitHub
parent 1a752148a9
commit ebd3971533
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 259 additions and 2 deletions

View File

@ -261,6 +261,14 @@ class ListController < ApplicationController
render "list", formats: [:rss]
end
def hot_feed
discourse_expires_in 1.minute
@topic_list = TopicQuery.new(nil).list_hot
render "list", formats: [:rss]
end
def category_feed
guardian.ensure_can_see!(@category)
discourse_expires_in 1.minute

View File

@ -0,0 +1,11 @@
# frozen_string_literal: true
module Jobs
class UpdateTopicHotScores < ::Jobs::Scheduled
every 10.minutes
def execute(args)
TopicHotScore.update_scores if SiteSetting.experimental_hot_topics
end
end
end

View File

@ -0,0 +1,140 @@
# frozen_string_literal: true
class TopicHotScore < ActiveRecord::Base
belongs_to :topic
DEFAULT_BATCH_SIZE = 1000
def self.update_scores(max = DEFAULT_BATCH_SIZE)
# score is
# (total likes - 1) / (age in hours + 2) ^ gravity
# 1. insert a new record if one does not exist (up to batch size)
# 2. update recently created (up to batch size)
# 3. update all top scoring topics (up to batch size)
now = Time.zone.now
args = {
now: now,
gravity: SiteSetting.hot_topics_gravity,
max: max,
private_message: Archetype.private_message,
recent_cutoff: now - SiteSetting.hot_topics_recent_days.days,
}
# insert up to BATCH_SIZE records that are missing from table
DB.exec(<<~SQL, args)
INSERT INTO topic_hot_scores (
topic_id,
score,
recent_likes,
recent_posters,
created_at,
updated_at
)
SELECT
topics.id,
0.0,
0,
0,
:now,
:now
FROM topics
LEFT OUTER JOIN topic_hot_scores ON topic_hot_scores.topic_id = topics.id
WHERE topic_hot_scores.topic_id IS NULL
AND topics.deleted_at IS NULL
AND topics.archetype <> :private_message
AND topics.created_at <= :now
ORDER BY topics.bumped_at desc
LIMIT :max
SQL
# update recent counts for batch
DB.exec(<<~SQL, args)
UPDATE topic_hot_scores thsOrig
SET
recent_likes = COALESCE(new_values.likes_count, 0),
recent_posters = COALESCE(new_values.unique_participants, 0),
recent_first_bumped_at = COALESCE(new_values.first_bumped_at, ths.recent_first_bumped_at)
FROM
topic_hot_scores ths
LEFT OUTER JOIN
(
SELECT
t.id AS topic_id,
COUNT(DISTINCT p.user_id) AS unique_participants,
(
SELECT COUNT(*)
FROM post_actions pa
JOIN posts p2 ON p2.id = pa.post_id
WHERE p2.topic_id = t.id
AND pa.post_action_type_id = 2 -- action_type for 'like'
AND pa.created_at >= :recent_cutoff
AND pa.deleted_at IS NULL
) AS likes_count,
MIN(p.created_at) AS first_bumped_at
FROM
topics t
JOIN
posts p ON t.id = p.topic_id
WHERE
p.created_at >= :recent_cutoff
AND t.archetype <> 'private_message'
AND t.deleted_at IS NULL
AND p.deleted_at IS NULL
AND t.created_at <= :now
AND t.bumped_at >= :recent_cutoff
AND p.created_at < :now
AND p.created_at >= :recent_cutoff
GROUP BY
t.id
) AS new_values
ON ths.topic_id = new_values.topic_id
WHERE thsOrig.topic_id = ths.topic_id
SQL
# update up to BATCH_SIZE records that are out of date based on age
# we need an extra index for this
DB.exec(<<~SQL, args)
UPDATE topic_hot_scores ths
SET score = topics.like_count /
(EXTRACT(EPOCH FROM (:now - topics.created_at)) / 3600 + 2) ^ :gravity
+
CASE WHEN ths.recent_first_bumped_at IS NULL THEN 0 ELSE
(ths.recent_likes + ths.recent_posters) /
(EXTRACT(EPOCH FROM (:now - recent_first_bumped_at)) / 3600 + 2) ^ :gravity
END
,
updated_at = :now
FROM topics
WHERE topics.id IN (
SELECT topic_id FROM topic_hot_scores
ORDER BY score DESC, recent_first_bumped_at DESC NULLS LAST
LIMIT :max
) AND ths.topic_id = topics.id
SQL
end
end
# == Schema Information
#
# Table name: topic_hot_scores
#
# id :bigint not null, primary key
# topic_id :integer not null
# score :float default(0.0), not null
# recent_likes :integer default(0), not null
# recent_posters :integer default(0), not null
# recent_first_bumped_at :datetime
# created_at :datetime not null
# updated_at :datetime not null
#
# Indexes
#
# index_topic_hot_scores_on_score_and_topic_id (score,topic_id) UNIQUE
# index_topic_hot_scores_on_topic_id (topic_id) UNIQUE
#

View File

@ -2971,6 +2971,7 @@ en:
category: "There are no more %{category} topics."
tag: "There are no more %{tag} topics."
top: "There are no more top topics."
hot: "There are no more hot topics."
bookmarks: "There are no more bookmarked topics."
filter: "There are no more topics."
@ -4059,6 +4060,10 @@ en:
title: "Unseen"
lower_title: "unseen"
help: "new topics and topics you are currently watching or tracking with unread posts"
hot:
title: "Hot"
lower_title: "hot"
help: "top recent topics"
new:
lower_title_with_count:
one: "%{count} new"

View File

@ -1217,6 +1217,7 @@ Discourse::Application.routes.draw do
get "latest.rss" => "list#latest_feed", :format => :rss
get "top.rss" => "list#top_feed", :format => :rss
get "hot.rss" => "list#hot_feed", :format => :rss
Discourse.filters.each { |filter| get "#{filter}" => "list##{filter}" }

View File

@ -3111,3 +3111,12 @@ dashboard:
verbose_user_stat_count_logging:
hidden: true
default: false
experimental_hot_topics:
hidden: true
default: false
hot_topics_gravity:
hidden: true
default: 1.8
hot_topics_recent_days:
hidden: true
default: 7

View File

@ -0,0 +1,17 @@
# frozen_string_literal: true
class CreateTopicHotScores < ActiveRecord::Migration[7.0]
def change
create_table :topic_hot_scores do |t|
t.integer :topic_id, null: false
t.float :score, null: false, default: 0.0
t.integer :recent_likes, null: false, default: 0
t.integer :recent_posters, null: false, default: 0
t.datetime :recent_first_bumped_at
t.timestamps
end
add_index :topic_hot_scores, :topic_id, unique: true
add_index :topic_hot_scores, %i[score topic_id], unique: true
end
end

View File

@ -313,11 +313,11 @@ module Discourse
end
def self.filters
@filters ||= %i[latest unread new unseen top read posted bookmarks]
@filters ||= %i[latest unread new unseen top read posted bookmarks hot]
end
def self.anonymous_filters
@anonymous_filters ||= %i[latest top categories]
@anonymous_filters ||= %i[latest top categories hot]
end
def self.top_menu_items

View File

@ -338,6 +338,14 @@ class TopicQuery
create_list(:bookmarks) { |l| l.where("tu.bookmarked") }
end
def list_hot
create_list(:hot, unordered: true) do |topics|
topics.joins("JOIN topic_hot_scores on topics.id = topic_hot_scores.topic_id").order(
"topic_hot_scores.score DESC",
)
end
end
def list_top_for(period)
score_column = TopTopic.score_column_for_period(period)
create_list(:top, unordered: true) do |topics|

View File

@ -0,0 +1,58 @@
# frozen_string_literal: true
RSpec.describe TopicHotScore do
describe ".update_scores" do
fab!(:user)
fab!(:user2) { Fabricate(:user) }
it "can correctly update like counts and post counts and account for activity" do
freeze_time
TopicHotScore.create!(topic_id: -1, score: 0.0, recent_likes: 99, recent_posters: 0)
old_post = Fabricate(:post, created_at: 10.months.ago)
topic = old_post.topic
new_reply = Fabricate(:post, user: user, topic: topic, created_at: 4.hours.ago)
newer_reply = Fabricate(:post, user: user2, topic: topic, created_at: 1.hour.ago)
Fabricate(:post, user: user2, topic: topic, created_at: 1.minute.ago)
freeze_time(1.year.ago)
PostActionCreator.like(user, old_post)
freeze_time(1.year.from_now)
PostActionCreator.like(user2, new_reply)
PostActionCreator.like(user, newer_reply)
TopicHotScore.update_scores
hot_scoring = TopicHotScore.find_by(topic_id: topic.id)
expect(hot_scoring.recent_likes).to eq(2)
expect(hot_scoring.recent_posters).to eq(2)
expect(hot_scoring.recent_first_bumped_at).to eq_time(new_reply.created_at)
expect(hot_scoring.score).to be_within(0.001).of(1.020)
expect(TopicHotScore.find_by(topic_id: -1).recent_likes).to eq(0)
end
it "can correctly set scores for topics" do
freeze_time
topic1 = Fabricate(:topic, like_count: 3, created_at: 1.hour.ago)
topic2 = Fabricate(:topic, like_count: 10, created_at: 3.hour.ago)
TopicHotScore.update_scores
expect(TopicHotScore.find_by(topic_id: topic1.id).score).to be_within(0.001).of(0.415)
expect(TopicHotScore.find_by(topic_id: topic2.id).score).to be_within(0.001).of(0.551)
freeze_time(2.hours.from_now)
TopicHotScore.update_scores
expect(TopicHotScore.find_by(topic_id: topic1.id).score).to be_within(0.001).of(0.165)
expect(TopicHotScore.find_by(topic_id: topic2.id).score).to be_within(0.001).of(0.301)
end
end
end