FIX: improve performance of UserStat.ensure_consistency (#21044)
Optimize `UserStatpost_read_count` calculation. In addition, tests were updated to fail when code is not evaluated. Creation of PostTiming was updating `post_read_count`. Count it has to be reset to ensure that ensure_consitency correctly calculates result. Extracting users seen in the last hour to separate Common Table Expression reduces the amount of processed rows. Before ``` Update on user_stats (cost=267492.07..270822.95 rows=2900 width=174) (actual time=12606.121..12606.127 rows=0 loops=1) -> Hash Join (cost=267492.07..270822.95 rows=2900 width=174) (actual time=12561.814..12603.689 rows=10 loops=1) Hash Cond: (user_stats.user_id = x.user_id) Join Filter: (x.c <> user_stats.posts_read_count) Rows Removed by Join Filter: 67 -> Seq Scan on user_stats (cost=0.00..3125.34 rows=75534 width=134) (actual time=0.014..39.173 rows=75534 loops=1) -> Hash (cost=267455.80..267455.80 rows=2901 width=48) (actual time=12558.613..12558.617 rows=77 loops=1) Buckets: 4096 Batches: 1 Memory Usage: 39kB -> Subquery Scan on x (cost=267376.03..267455.80 rows=2901 width=48) (actual time=12168.601..12558.572 rows=77 loops=1) -> GroupAggregate (cost=267376.03..267426.79 rows=2901 width=12) (actual time=12168.595..12558.525 rows=77 loops=1) Group Key: pt.user_id -> Sort (cost=267376.03..267383.28 rows=2901 width=4) (actual time=12100.490..12352.106 rows=2072830 loops=1) Sort Key: pt.user_id Sort Method: external merge Disk: 28488kB -> Nested Loop (cost=1.28..267209.18 rows=2901 width=4) (actual time=0.040..11528.680 rows=2072830 loops=1) -> Nested Loop (cost=0.86..261390.02 rows=13159 width=8) (actual time=0.030..3492.887 rows=3581648 loops=1) -> Index Scan using index_users_on_last_seen_at on users u (cost=0.42..89.71 rows=28 width=4) (actual time=0.010..0.201 rows=78 loops=1) Index Cond: (last_seen_at > '2023-04-11 00:22:49.555537'::timestamp without time zone) -> Index Scan using index_post_timings_on_user_id on post_timings pt (cost=0.44..9287.60 rows=4455 width=8) (actual time=0.081..38.542 rows=45919 loops=78) Index Cond: (user_id = u.id) -> Index Scan using forum_threads_pkey on topics t (cost=0.42..0.44 rows=1 width=4) (actual time=0.002..0.002 rows=1 loops=3581648) Index Cond: (id = pt.topic_id) Filter: ((deleted_at IS NULL) AND ((archetype)::text = 'regular'::text)) Rows Removed by Filter: 0 Planning Time: 0.692 ms Execution Time: 12612.587 ms ``` After ``` Update on user_stats (cost=9473.60..12804.30 rows=2828 width=174) (actual time=677.724..677.729 rows=0 loops=1) -> Hash Join (cost=9473.60..12804.30 rows=2828 width=174) (actual time=672.536..677.706 rows=1 loops=1) Hash Cond: (user_stats.user_id = x.user_id) Join Filter: (x.c <> user_stats.posts_read_count) Rows Removed by Join Filter: 54 -> Seq Scan on user_stats (cost=0.00..3125.34 rows=75534 width=134) (actual time=0.012..23.977 rows=75534 loops=1) -> Hash (cost=9438.24..9438.24 rows=2829 width=48) (actual time=647.818..647.822 rows=55 loops=1) Buckets: 4096 Batches: 1 Memory Usage: 37kB -> Subquery Scan on x (cost=9381.66..9438.24 rows=2829 width=48) (actual time=647.409..647.805 rows=55 loops=1) -> HashAggregate (cost=9381.66..9409.95 rows=2829 width=12) (actual time=647.403..647.786 rows=55 loops=1) Group Key: pt.user_id Batches: 1 Memory Usage: 121kB -> Nested Loop (cost=1.86..9367.51 rows=2829 width=4) (actual time=0.056..625.245 rows=120022 loops=1) -> Nested Loop (cost=1.44..3692.96 rows=12832 width=8) (actual time=0.047..171.754 rows=217440 loops=1) -> Nested Loop (cost=1.00..254.63 rows=25 width=12) (actual time=0.030..1.407 rows=56 loops=1) Join Filter: (u.id = user_stats_1.user_id) -> Nested Loop (cost=0.71..243.08 rows=25 width=8) (actual time=0.018..1.207 rows=87 loops=1) -> Index Scan using index_users_on_last_seen_at on users u (cost=0.42..86.71 rows=27 width=4) (actual time=0.009..0.156 rows=87 loops=1) Index Cond: (last_seen_at > '2023-04-11 00:47:07.437568'::timestamp without time zone) -> Index Only Scan using user_stats_pkey on user_stats us (cost=0.29..5.79 rows=1 width=4) (actual time=0.011..0.011 rows=1 loops=87) Index Cond: (user_id = u.id) Heap Fetches: 87 -> Index Scan using user_stats_pkey on user_stats user_stats_1 (cost=0.29..0.45 rows=1 width=4) (actual time=0.002..0.002 rows=1 loops=87) Index Cond: (user_id = us.user_id) Filter: (posts_read_count < 10000) Rows Removed by Filter: 0 -> Index Scan using index_post_timings_on_user_id on post_timings pt (cost=0.44..92.98 rows=4455 width=8) (actual time=0.036..2.492 rows=3883 loops=56) Index Cond: (user_id = user_stats_1.user_id) -> Index Scan using forum_threads_pkey on topics t (cost=0.42..0.44 rows=1 width=4) (actual time=0.002..0.002 rows=1 loops=217440) Index Cond: (id = pt.topic_id) Filter: ((deleted_at IS NULL) AND ((archetype)::text = 'regular'::text)) Rows Removed by Filter: 0 Planning Time: 1.406 ms Execution Time: 677.817 ms ```
This commit is contained in:
parent
2809d7ba8e
commit
63a0466548
|
@ -175,19 +175,23 @@ class UserStat < ActiveRecord::Base
|
|||
|
||||
# Update denormalized posts_read_count
|
||||
DB.exec(<<~SQL, seen_at: last_seen)
|
||||
WITH filtered_users AS (
|
||||
SELECT id FROM users u
|
||||
JOIN user_stats ON user_id = u.id
|
||||
WHERE last_seen_at > :seen_at
|
||||
AND posts_read_count < 10000
|
||||
)
|
||||
UPDATE user_stats SET posts_read_count = X.c
|
||||
FROM
|
||||
(SELECT pt.user_id,
|
||||
COUNT(*) AS c
|
||||
FROM users AS u
|
||||
JOIN post_timings AS pt ON pt.user_id = u.id
|
||||
JOIN topics t ON t.id = pt.topic_id
|
||||
WHERE u.last_seen_at > :seen_at AND
|
||||
t.archetype = 'regular' AND
|
||||
t.deleted_at IS NULL
|
||||
GROUP BY pt.user_id) AS X
|
||||
WHERE X.user_id = user_stats.user_id AND
|
||||
X.c <> posts_read_count
|
||||
FROM (SELECT pt.user_id, COUNT(*) as c
|
||||
FROM filtered_users AS u
|
||||
JOIN post_timings AS pt ON pt.user_id = u.id
|
||||
JOIN topics t ON t.id = pt.topic_id
|
||||
WHERE t.archetype = 'regular'
|
||||
AND t.deleted_at IS NULL
|
||||
GROUP BY pt.user_id
|
||||
) AS X
|
||||
WHERE X.user_id = user_stats.user_id
|
||||
AND X.c <> posts_read_count
|
||||
SQL
|
||||
end
|
||||
|
||||
|
|
|
@ -27,19 +27,18 @@ RSpec.describe UserStat do
|
|||
fab!(:topic) { Fabricate(:topic) }
|
||||
let!(:view) { TopicViewItem.add(topic.id, "127.0.0.1", user.id) }
|
||||
|
||||
before { user.update_column :last_seen_at, 1.second.ago }
|
||||
before do
|
||||
user.update_column :last_seen_at, 1.second.ago
|
||||
stat.update_column :topics_entered, 0
|
||||
end
|
||||
|
||||
it "adds one to the topics entered" do
|
||||
UserStat.update_view_counts
|
||||
stat.reload
|
||||
expect(stat.topics_entered).to eq(1)
|
||||
expect { UserStat.update_view_counts }.to change { stat.reload.topics_entered }.to 1
|
||||
end
|
||||
|
||||
it "won't record a second view as a different topic" do
|
||||
TopicViewItem.add(topic.id, "127.0.0.1", user.id)
|
||||
UserStat.update_view_counts
|
||||
stat.reload
|
||||
expect(stat.topics_entered).to eq(1)
|
||||
expect { UserStat.update_view_counts }.to change { stat.reload.topics_entered }.to 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -65,12 +64,13 @@ RSpec.describe UserStat do
|
|||
)
|
||||
end
|
||||
|
||||
before { user.update_column :last_seen_at, 1.second.ago }
|
||||
before do
|
||||
user.update_column :last_seen_at, 1.second.ago
|
||||
stat.update_column :posts_read_count, 0
|
||||
end
|
||||
|
||||
it "increases posts_read_count" do
|
||||
UserStat.update_view_counts
|
||||
stat.reload
|
||||
expect(stat.posts_read_count).to eq(1)
|
||||
expect { UserStat.update_view_counts }.to change { stat.reload.posts_read_count }.to 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue