PERF: calculate scores for topics/posts faster
Only look at topics that changed in last day for 15 minuted schedule Do a full recalc weekly, just in case
This commit is contained in:
parent
e4909c017c
commit
9551f4aeea
|
@ -9,8 +9,8 @@ module Jobs
|
|||
|
||||
def execute(args)
|
||||
# Update the average times
|
||||
Post.calculate_avg_time
|
||||
Topic.calculate_avg_time
|
||||
Post.calculate_avg_time(1.day.ago)
|
||||
Topic.calculate_avg_time(1.day.ago)
|
||||
|
||||
# Feature topics in categories
|
||||
CategoryFeaturedTopic.feature_topics
|
||||
|
@ -19,8 +19,7 @@ module Jobs
|
|||
UserStat.update_view_counts
|
||||
|
||||
# Update the scores of posts
|
||||
ScoreCalculator.new.calculate
|
||||
|
||||
ScoreCalculator.new.calculate(1.day.ago)
|
||||
|
||||
# Automatically close stuff that we missed
|
||||
Topic.auto_close
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
require_dependency 'score_calculator'
|
||||
|
||||
module Jobs
|
||||
|
||||
# This job will run on a regular basis to update statistics and denormalized data.
|
||||
# If it does not run, the site will not function properly.
|
||||
class Weekly < Jobs::Scheduled
|
||||
every 1.week
|
||||
|
||||
def execute(args)
|
||||
Post.calculate_avg_time
|
||||
Topic.calculate_avg_time
|
||||
ScoreCalculator.new.calculate
|
||||
end
|
||||
end
|
||||
end
|
|
@ -311,9 +311,9 @@ class Post < ActiveRecord::Base
|
|||
|
||||
# This calculates the geometric mean of the post timings and stores it along with
|
||||
# each post.
|
||||
def self.calculate_avg_time
|
||||
def self.calculate_avg_time(min_topic_age=nil)
|
||||
retry_lock_error do
|
||||
exec_sql("UPDATE posts
|
||||
builder = SqlBuilder.new("UPDATE posts
|
||||
SET avg_time = (x.gmean / 1000)
|
||||
FROM (SELECT post_timings.topic_id,
|
||||
post_timings.post_number,
|
||||
|
@ -324,9 +324,18 @@ class Post < ActiveRecord::Base
|
|||
AND p2.topic_id = post_timings.topic_id
|
||||
AND p2.user_id <> post_timings.user_id
|
||||
GROUP BY post_timings.topic_id, post_timings.post_number) AS x
|
||||
WHERE x.topic_id = posts.topic_id
|
||||
/*where*/")
|
||||
|
||||
builder.where("x.topic_id = posts.topic_id
|
||||
AND x.post_number = posts.post_number
|
||||
AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS NULL)")
|
||||
|
||||
if min_topic_age
|
||||
builder.where("posts.topic_id IN (SELECT id FROM topics where bumped_at > :bumped_at)",
|
||||
bumped_at: min_topic_age)
|
||||
end
|
||||
|
||||
builder.exec
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -362,15 +362,25 @@ class Topic < ActiveRecord::Base
|
|||
end
|
||||
|
||||
# This calculates the geometric mean of the posts and stores it with the topic
|
||||
def self.calculate_avg_time
|
||||
exec_sql("UPDATE topics
|
||||
def self.calculate_avg_time(min_topic_age=nil)
|
||||
builder = SqlBuilder.new("UPDATE topics
|
||||
SET avg_time = x.gmean
|
||||
FROM (SELECT topic_id,
|
||||
round(exp(avg(ln(avg_time)))) AS gmean
|
||||
FROM posts
|
||||
WHERE avg_time > 0 AND avg_time IS NOT NULL
|
||||
GROUP BY topic_id) AS x
|
||||
WHERE x.topic_id = topics.id AND (topics.avg_time <> x.gmean OR topics.avg_time IS NULL)")
|
||||
/*where*/")
|
||||
|
||||
builder.where("x.topic_id = topics.id AND
|
||||
(topics.avg_time <> x.gmean OR topics.avg_time IS NULL)")
|
||||
|
||||
if min_topic_age
|
||||
builder.where("topics.bumped_at > :bumped_at",
|
||||
bumped_at: min_topic_age)
|
||||
end
|
||||
|
||||
builder.exec
|
||||
end
|
||||
|
||||
def changed_to_category(cat)
|
||||
|
|
|
@ -16,22 +16,60 @@ class ScoreCalculator
|
|||
end
|
||||
|
||||
# Calculate the score for all posts based on the weightings
|
||||
def calculate
|
||||
def calculate(min_topic_age=nil)
|
||||
|
||||
# First update the scores of the posts
|
||||
exec_sql(post_score_sql, @weightings)
|
||||
update_posts_score(min_topic_age)
|
||||
|
||||
# Update the percent rankings of the posts
|
||||
exec_sql("UPDATE posts SET percent_rank = x.percent_rank
|
||||
update_posts_rank(min_topic_age)
|
||||
|
||||
update_topics_rank(min_topic_age)
|
||||
|
||||
update_topics_percent_rank(min_topic_age)
|
||||
|
||||
end
|
||||
|
||||
|
||||
private
|
||||
|
||||
def update_posts_score(min_topic_age)
|
||||
components = []
|
||||
@weightings.keys.each { |k| components << "COALESCE(#{k.to_s}, 0) * :#{k.to_s}" }
|
||||
components = components.join(" + ")
|
||||
|
||||
builder = SqlBuilder.new(
|
||||
"UPDATE posts SET score = x.score
|
||||
FROM (SELECT id, #{components} as score FROM posts) AS x
|
||||
/*where*/"
|
||||
|
||||
)
|
||||
|
||||
builder.where("x.id = posts.id
|
||||
AND (posts.score IS NULL OR x.score <> posts.score)", @weightings)
|
||||
|
||||
filter_topics(builder, min_topic_age)
|
||||
|
||||
builder.exec
|
||||
end
|
||||
|
||||
def update_posts_rank(min_topic_age)
|
||||
|
||||
builder = SqlBuilder.new("UPDATE posts SET percent_rank = x.percent_rank
|
||||
FROM (SELECT id, percent_rank()
|
||||
OVER (PARTITION BY topic_id ORDER BY SCORE DESC) as percent_rank
|
||||
FROM posts) AS x
|
||||
WHERE x.id = posts.id AND
|
||||
/*where*/")
|
||||
|
||||
builder.where("x.id = posts.id AND
|
||||
(posts.percent_rank IS NULL OR x.percent_rank <> posts.percent_rank)")
|
||||
|
||||
|
||||
# Update the topics
|
||||
exec_sql "UPDATE topics AS t
|
||||
filter_topics(builder, min_topic_age)
|
||||
|
||||
builder.exec
|
||||
end
|
||||
|
||||
def update_topics_rank(min_topic_age)
|
||||
builder = SqlBuilder.new("UPDATE topics AS t
|
||||
SET has_summary = (t.like_count >= :likes_required AND
|
||||
t.posts_count >= :posts_required AND
|
||||
x.max_score >= :score_required),
|
||||
|
@ -41,7 +79,9 @@ class ScoreCalculator
|
|||
AVG(p.score) AS avg_score
|
||||
FROM posts AS p
|
||||
GROUP BY p.topic_id) AS x
|
||||
WHERE x.topic_id = t.id AND
|
||||
/*where*/")
|
||||
|
||||
builder.where("x.topic_id = t.id AND
|
||||
(
|
||||
(t.score <> x.avg_score OR t.score IS NULL) OR
|
||||
(t.has_summary IS NULL OR t.has_summary <> (
|
||||
|
@ -53,31 +93,45 @@ class ScoreCalculator
|
|||
",
|
||||
likes_required: SiteSetting.summary_likes_required,
|
||||
posts_required: SiteSetting.summary_posts_required,
|
||||
score_required: SiteSetting.summary_score_threshold
|
||||
score_required: SiteSetting.summary_score_threshold)
|
||||
|
||||
# Update percentage rank of topics
|
||||
exec_sql("UPDATE topics SET percent_rank = x.percent_rank
|
||||
if min_topic_age
|
||||
builder.where("t.bumped_at > :bumped_at ",
|
||||
bumped_at: min_topic_age)
|
||||
end
|
||||
|
||||
builder.exec
|
||||
end
|
||||
|
||||
def update_topics_percent_rank(min_topic_age)
|
||||
|
||||
builder = SqlBuilder.new("UPDATE topics SET percent_rank = x.percent_rank
|
||||
FROM (SELECT id, percent_rank()
|
||||
OVER (ORDER BY SCORE DESC) as percent_rank
|
||||
FROM topics) AS x
|
||||
WHERE x.id = topics.id AND (topics.percent_rank <> x.percent_rank OR topics.percent_rank IS NULL)")
|
||||
/*where*/")
|
||||
|
||||
builder.where("x.id = topics.id AND (topics.percent_rank <> x.percent_rank OR topics.percent_rank IS NULL)")
|
||||
|
||||
|
||||
if min_topic_age
|
||||
builder.where("topics.bumped_at > :bumped_at ",
|
||||
bumped_at: min_topic_age)
|
||||
end
|
||||
|
||||
|
||||
builder.exec
|
||||
end
|
||||
|
||||
|
||||
private
|
||||
|
||||
def exec_sql(sql, params=nil)
|
||||
ActiveRecord::Base.exec_sql(sql, params)
|
||||
def filter_topics(builder, min_topic_age)
|
||||
if min_topic_age
|
||||
builder.where('posts.topic_id IN
|
||||
(SELECT id FROM topics WHERE bumped_at > :bumped_at)',
|
||||
bumped_at: min_topic_age)
|
||||
end
|
||||
|
||||
# Generate a SQL statement to update the scores of all posts
|
||||
def post_score_sql
|
||||
components = []
|
||||
@weightings.keys.each { |k| components << "COALESCE(#{k.to_s}, 0) * :#{k.to_s}" }
|
||||
components = components.join(" + ")
|
||||
builder
|
||||
end
|
||||
|
||||
"UPDATE posts SET score = x.score
|
||||
FROM (SELECT id, #{components} as score FROM posts) AS x
|
||||
WHERE x.id = posts.id AND (posts.score IS NULL OR x.score <> posts.score)"
|
||||
end
|
||||
end
|
||||
|
|
|
@ -787,4 +787,12 @@ describe Post do
|
|||
end
|
||||
end
|
||||
|
||||
describe "calculate_avg_time" do
|
||||
|
||||
it "should not crash" do
|
||||
Post.calculate_avg_time
|
||||
Post.calculate_avg_time(1.day.ago)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -1318,4 +1318,11 @@ describe Topic do
|
|||
end
|
||||
|
||||
end
|
||||
|
||||
describe "calculate_avg_time" do
|
||||
it "does not explode" do
|
||||
Topic.calculate_avg_time
|
||||
Topic.calculate_avg_time(1.day.ago)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue