From f8eddd40ade5a62cd5f9349b71a2d1b60e3c7d2e Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Mon, 6 May 2019 15:58:49 +1000 Subject: [PATCH] PERF: remove avg_time calculations and regular jobs from posts and topics After careful analysis of large data-sets it became apparent that avg_time had no impact whatsoever on "best of" topic scoring. Calculating avg_time was a very costly operation especially on large databases. We have some longer term plans of introducing other weighting that is read time based into our scoring for "best of" and "top" topics, but in the interim to stop a large amount of work that is not achieving any value we are removing the jobs. Column removal will follow once we decide on a new replacement metric. --- app/jobs/scheduled/calculate_avg_time.rb | 14 ----- app/jobs/scheduled/weekly.rb | 2 - app/models/post.rb | 31 --------- app/models/topic.rb | 25 -------- app/serializers/post_serializer.rb | 1 - lib/score_calculator.rb | 1 - spec/models/post_spec.rb | 8 --- spec/models/post_timing_spec.rb | 63 ------------------- spec/models/topic_spec.rb | 7 --- test/javascripts/fixtures/post.js.es6 | 3 - .../fixtures/search-fixtures.js.es6 | 5 -- test/javascripts/fixtures/topic.js.es6 | 38 ----------- 12 files changed, 198 deletions(-) delete mode 100644 app/jobs/scheduled/calculate_avg_time.rb diff --git a/app/jobs/scheduled/calculate_avg_time.rb b/app/jobs/scheduled/calculate_avg_time.rb deleted file mode 100644 index c0bc9387e4d..00000000000 --- a/app/jobs/scheduled/calculate_avg_time.rb +++ /dev/null @@ -1,14 +0,0 @@ -module Jobs - class CalculateAvgTime < Jobs::Scheduled - every 1.day - - # PERF: these calculations can become exceedingly expnsive - # they run a huge gemoetric mean and are hard to optimise - # defer to only run once a day - def execute(args) - # Update the average times - Post.calculate_avg_time(2.days.ago) - Topic.calculate_avg_time(2.days.ago) - end - end -end diff --git a/app/jobs/scheduled/weekly.rb b/app/jobs/scheduled/weekly.rb index 72fdb0127c2..d76b9f32f92 100644 --- a/app/jobs/scheduled/weekly.rb +++ b/app/jobs/scheduled/weekly.rb @@ -8,8 +8,6 @@ module Jobs every 1.week def execute(args) - Post.calculate_avg_time - Topic.calculate_avg_time ScoreCalculator.new.calculate MiniScheduler::Stat.purge_old Draft.cleanup! diff --git a/app/models/post.rb b/app/models/post.rb index 903400c0049..f51146ca8c3 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -674,37 +674,6 @@ class Post < ActiveRecord::Base end - # This calculates the geometric mean of the post timings and stores it along with - # each post. - def self.calculate_avg_time(min_topic_age = nil) - retry_lock_error do - builder = DB.build("UPDATE posts - SET avg_time = (x.gmean / 1000) - FROM (SELECT post_timings.topic_id, - post_timings.post_number, - round(exp(avg(CASE WHEN msecs > 0 THEN ln(msecs) ELSE 0 END))) AS gmean - FROM post_timings - INNER JOIN posts AS p2 - ON p2.post_number = post_timings.post_number - AND p2.topic_id = post_timings.topic_id - AND p2.user_id <> post_timings.user_id - /*where2*/ - GROUP BY post_timings.topic_id, post_timings.post_number) AS x - /*where*/") - - builder.where("x.topic_id = posts.topic_id - AND x.post_number = posts.post_number - AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS NULL)") - - if min_topic_age - builder.where2("p2.topic_id IN (SELECT id FROM topics where bumped_at > :bumped_at)", - bumped_at: min_topic_age) - end - - builder.exec - end - end - before_save do self.last_editor_id ||= user_id diff --git a/app/models/topic.rb b/app/models/topic.rb index bb5a0835ad1..b8a0b187976 100644 --- a/app/models/topic.rb +++ b/app/models/topic.rb @@ -671,31 +671,6 @@ class Topic < ActiveRecord::Base SQL end - # This calculates the geometric mean of the posts and stores it with the topic - def self.calculate_avg_time(min_topic_age = nil) - builder = DB.build <<~SQL - UPDATE topics - SET avg_time = x.gmean - FROM (SELECT topic_id, - round(exp(avg(ln(avg_time)))) AS gmean - FROM posts - WHERE avg_time > 0 AND avg_time IS NOT NULL - GROUP BY topic_id) AS x - /*where*/ - SQL - - builder.where <<~SQL - x.topic_id = topics.id AND - (topics.avg_time <> x.gmean OR topics.avg_time IS NULL) - SQL - - if min_topic_age - builder.where("topics.bumped_at > :bumped_at", bumped_at: min_topic_age) - end - - builder.exec - end - def changed_to_category(new_category) return true if new_category.blank? || Category.exists?(topic_id: id) return false if new_category.id == SiteSetting.uncategorized_category_id && !SiteSetting.allow_uncategorized_topics diff --git a/app/serializers/post_serializer.rb b/app/serializers/post_serializer.rb index 8614d0fa5fb..2e059ac3b11 100644 --- a/app/serializers/post_serializer.rb +++ b/app/serializers/post_serializer.rb @@ -23,7 +23,6 @@ class PostSerializer < BasicPostSerializer :reply_count, :reply_to_post_number, :quote_count, - :avg_time, :incoming_link_count, :reads, :score, diff --git a/lib/score_calculator.rb b/lib/score_calculator.rb index 69c8173d9b4..cdb70afd355 100644 --- a/lib/score_calculator.rb +++ b/lib/score_calculator.rb @@ -6,7 +6,6 @@ class ScoreCalculator like_score: 15, incoming_link_count: 5, bookmark_count: 2, - avg_time: 0.05, reads: 0.2 } end diff --git a/spec/models/post_spec.rb b/spec/models/post_spec.rb index 84629708bc8..be6cfeff408 100644 --- a/spec/models/post_spec.rb +++ b/spec/models/post_spec.rb @@ -1023,14 +1023,6 @@ describe Post do end end - describe "calculate_avg_time" do - - it "should not crash" do - Post.calculate_avg_time - Post.calculate_avg_time(1.day.ago) - end - end - describe "has_host_spam" do let(:raw) { "hello from my site http://www.example.net http://#{GlobalSetting.hostname} http://#{RailsMultisite::ConnectionManagement.current_hostname}" } diff --git a/spec/models/post_timing_spec.rb b/spec/models/post_timing_spec.rb index b09a84881ea..dc52290f78b 100644 --- a/spec/models/post_timing_spec.rb +++ b/spec/models/post_timing_spec.rb @@ -162,69 +162,6 @@ describe PostTiming do end - describe 'avg times' do - - describe 'posts' do - it 'has no avg_time by default' do - expect(@post.avg_time).to be_blank - end - - it "doesn't change when we calculate the avg time for the post because there's no timings" do - Post.calculate_avg_time - @post.reload - expect(@post.avg_time).to be_blank - end - end - - describe 'topics' do - it 'has no avg_time by default' do - expect(@topic.avg_time).to be_blank - end - - it "doesn't change when we calculate the avg time for the post because there's no timings" do - Topic.calculate_avg_time - @topic.reload - expect(@topic.avg_time).to be_blank - end - end - - describe "it doesn't create an avg time for the same user" do - it 'something' do - PostTiming.record_timing(@timing_attrs.merge(user_id: @post.user_id)) - Post.calculate_avg_time - @post.reload - expect(@post.avg_time).to be_blank - end - - end - - describe 'with a timing for another user' do - before do - PostTiming.record_timing(@timing_attrs) - Post.calculate_avg_time - @post.reload - end - - it 'has a post avg_time from the timing' do - expect(@post.avg_time).to be_present - end - - describe 'forum topics' do - before do - Topic.calculate_avg_time - @topic.reload - end - - it 'has an avg_time from the timing' do - expect(@topic.avg_time).to be_present - end - - end - - end - - end - end end diff --git a/spec/models/topic_spec.rb b/spec/models/topic_spec.rb index 0180d94c48f..34ca73a16c4 100644 --- a/spec/models/topic_spec.rb +++ b/spec/models/topic_spec.rb @@ -2030,13 +2030,6 @@ describe Topic do end - describe "calculate_avg_time" do - it "does not explode" do - Topic.calculate_avg_time - Topic.calculate_avg_time(1.day.ago) - end - end - describe "expandable_first_post?" do let(:topic) { Fabricate.build(:topic) } diff --git a/test/javascripts/fixtures/post.js.es6 b/test/javascripts/fixtures/post.js.es6 index 28288e47394..736f4376d23 100644 --- a/test/javascripts/fixtures/post.js.es6 +++ b/test/javascripts/fixtures/post.js.es6 @@ -15,7 +15,6 @@ export default { reply_count: 1, reply_to_post_number: null, quote_count: 0, - avg_time: 25, incoming_link_count: 314, reads: 475, score: 1702.25, @@ -68,7 +67,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -118,7 +116,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, diff --git a/test/javascripts/fixtures/search-fixtures.js.es6 b/test/javascripts/fixtures/search-fixtures.js.es6 index 96a14ae24b5..60d179fa747 100644 --- a/test/javascripts/fixtures/search-fixtures.js.es6 +++ b/test/javascripts/fixtures/search-fixtures.js.es6 @@ -838,7 +838,6 @@ export default { reply_count: 1, reply_to_post_number: null, quote_count: 0, - avg_time: 24, incoming_link_count: 4422, reads: 327, score: 21978.4, @@ -928,7 +927,6 @@ export default { reply_count: 2, reply_to_post_number: null, quote_count: 0, - avg_time: 36, incoming_link_count: 4680, reads: 491, score: 23815.8, @@ -1019,7 +1017,6 @@ export default { reply_count: 1, reply_to_post_number: null, quote_count: 0, - avg_time: 36, incoming_link_count: 1483, reads: 274, score: 7985.4, @@ -1110,7 +1107,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 39, incoming_link_count: 1241, reads: 149, score: 6161.35, @@ -1200,7 +1196,6 @@ export default { reply_count: 1, reply_to_post_number: null, quote_count: 0, - avg_time: 22, incoming_link_count: 386, reads: 163, score: 1953.7, diff --git a/test/javascripts/fixtures/topic.js.es6 b/test/javascripts/fixtures/topic.js.es6 index 2d657aae380..0ced6085b41 100644 --- a/test/javascripts/fixtures/topic.js.es6 +++ b/test/javascripts/fixtures/topic.js.es6 @@ -20,7 +20,6 @@ export default { reply_count: 1, reply_to_post_number: null, quote_count: 0, - avg_time: 25, incoming_link_count: 314, reads: 475, score: 1702.25, @@ -164,7 +163,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 27, incoming_link_count: 16, reads: 460, score: 308.35, @@ -268,7 +266,6 @@ export default { reply_count: 3, reply_to_post_number: null, quote_count: 0, - avg_time: 33, incoming_link_count: 5, reads: 449, score: 191.45, @@ -382,7 +379,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 20, incoming_link_count: 15, reads: 401, score: 291.2, @@ -486,7 +482,6 @@ export default { reply_count: 2, reply_to_post_number: 3, quote_count: 1, - avg_time: 22, incoming_link_count: 10, reads: 386, score: 213.3, @@ -592,7 +587,6 @@ export default { reply_count: 1, reply_to_post_number: 5, quote_count: 0, - avg_time: 17, incoming_link_count: 4, reads: 329, score: 106.65, @@ -708,7 +702,6 @@ export default { reply_count: 1, reply_to_post_number: 6, quote_count: 1, - avg_time: 16, incoming_link_count: 0, reads: 326, score: 86.0, @@ -812,7 +805,6 @@ export default { reply_count: 1, reply_to_post_number: 7, quote_count: 0, - avg_time: 11, incoming_link_count: 2, reads: 296, score: 89.75, @@ -922,7 +914,6 @@ export default { reply_count: 1, reply_to_post_number: 8, quote_count: 1, - avg_time: 10, incoming_link_count: 0, reads: 293, score: 79.1, @@ -1017,7 +1008,6 @@ export default { reply_count: 1, reply_to_post_number: 9, quote_count: 0, - avg_time: 7, incoming_link_count: 0, reads: 275, score: 75.35, @@ -1117,7 +1107,6 @@ export default { reply_count: 1, reply_to_post_number: 10, quote_count: 0, - avg_time: 7, incoming_link_count: 1, reads: 273, score: 79.95, @@ -1217,7 +1206,6 @@ export default { reply_count: 1, reply_to_post_number: 11, quote_count: 1, - avg_time: 7, incoming_link_count: 2, reads: 273, score: 84.95, @@ -1312,7 +1300,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 7, incoming_link_count: 11, reads: 290, score: 158.35, @@ -1407,7 +1394,6 @@ export default { reply_count: 1, reply_to_post_number: 12, quote_count: 1, - avg_time: 9, incoming_link_count: 0, reads: 283, score: 137.05, @@ -1511,7 +1497,6 @@ export default { reply_count: 1, reply_to_post_number: 14, quote_count: 1, - avg_time: 7, incoming_link_count: 0, reads: 260, score: 72.35, @@ -1606,7 +1591,6 @@ export default { reply_count: 1, reply_to_post_number: 15, quote_count: 1, - avg_time: 7, incoming_link_count: 0, reads: 254, score: 71.15, @@ -1701,7 +1685,6 @@ export default { reply_count: 2, reply_to_post_number: 16, quote_count: 1, - avg_time: 7, incoming_link_count: 0, reads: 255, score: 76.35, @@ -1796,7 +1779,6 @@ export default { reply_count: 2, reply_to_post_number: 17, quote_count: 0, - avg_time: 8, incoming_link_count: 0, reads: 264, score: 168.2, @@ -1896,7 +1878,6 @@ export default { reply_count: 0, reply_to_post_number: 18, quote_count: 0, - avg_time: 8, incoming_link_count: 1, reads: 241, score: 68.6, @@ -1996,7 +1977,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 9, incoming_link_count: 2, reads: 244, score: 74.25, @@ -3161,7 +3141,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 14, reads: 24, score: 224.6, @@ -3224,7 +3203,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 6, reads: 22, score: 34.2, @@ -3277,7 +3255,6 @@ export default { reply_count: 1, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 14, score: 7.2, @@ -3330,7 +3307,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 6, reads: 12, score: 31.6, @@ -3383,7 +3359,6 @@ export default { reply_count: 0, reply_to_post_number: 3, quote_count: 1, - avg_time: null, incoming_link_count: 0, reads: 11, score: 16.0, @@ -3735,7 +3710,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -3784,7 +3758,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -3833,7 +3806,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4028,7 +4000,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4081,7 +4052,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4299,7 +4269,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4348,7 +4317,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4397,7 +4365,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4594,7 +4561,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4643,7 +4609,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4692,7 +4657,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: null, incoming_link_count: 0, reads: 1, score: 0, @@ -4888,7 +4852,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 15, incoming_link_count: 0, reads: 2, score: 1.15, @@ -4951,7 +4914,6 @@ export default { reply_count: 0, reply_to_post_number: null, quote_count: 0, - avg_time: 16, incoming_link_count: 0, reads: 2, score: 1.2,