PERF: Keep track of when a users first unread is

This optimisation avoids large scans joining the topics table with the
topic_users table.

Previously when a user carried a lot of read state we would have to join
the entire read state with the topics table. This operation would slow down
home page and every topic page. The more read state you accumulated the
larger the impact.

The optimisation helps people who clean up unread, however if you carry
unread from years ago it will only have minimal impact.
This commit is contained in:
Sam Saffron 2019-04-05 12:44:36 +11:00
parent d299197392
commit 5f896ae8f7
7 changed files with 157 additions and 1 deletions

View File

@ -80,6 +80,10 @@ class PostTiming < ActiveRecord::Base
highest_seen_post_number: last_read,
last_read_post_number: last_read
)
if !topic.private_message?
set_minimum_first_unread!(user_id: user.id, date: topic.updated_at)
end
end
end
@ -92,9 +96,24 @@ class PostTiming < ActiveRecord::Base
TopicUser
.where('user_id = ? and topic_id in (?)', user_id, topic_ids)
.delete_all
date = Topic.listable_topics.where(id: topic_ids).minimum(:updated_at)
if date
set_minimum_first_unread!(user_id: user_id, date: date)
end
end
end
def self.set_minimum_first_unread!(user_id:, date:)
DB.exec(<<~SQL, date: date, user_id: user_id)
UPDATE user_stats
SET first_unread_at = :date
WHERE first_unread_at > :date AND
user_id = :user_id
SQL
end
MAX_READ_TIME_PER_BATCH = 60 * 1000.0
def self.process_timings(current_user, topic_id, topic_time, timings, opts = {})

View File

@ -172,7 +172,7 @@ class TopicTrackingState
#
sql = report_raw_sql(topic_id: topic_id, skip_unread: true, skip_order: true, staff: user.staff?)
sql << "\nUNION ALL\n\n"
sql << report_raw_sql(topic_id: topic_id, skip_new: true, skip_order: true, staff: user.staff?)
sql << report_raw_sql(topic_id: topic_id, skip_new: true, skip_order: true, staff: user.staff?, filter_old_unread: true)
DB.query(
sql,
@ -195,6 +195,13 @@ class TopicTrackingState
.gsub("-999", ":user_id")
end
filter_old_unread =
if opts && opts[:filter_old_unread]
" topics.updated_at >= us.first_unread_at AND "
else
""
end
new =
if opts && opts[:skip_new]
"1=0"
@ -221,6 +228,7 @@ class TopicTrackingState
JOIN categories c ON c.id = topics.category_id
LEFT JOIN topic_users tu ON tu.topic_id = topics.id AND tu.user_id = u.id
WHERE u.id = :user_id AND
#{filter_old_unread}
topics.archetype <> 'private_message' AND
((#{unread}) OR (#{new})) AND
(topics.visible OR u.admin OR u.moderator) AND

View File

@ -7,6 +7,77 @@ class UserStat < ActiveRecord::Base
def self.ensure_consistency!(last_seen = 1.hour.ago)
reset_bounce_scores
update_view_counts(last_seen)
update_first_unread(last_seen)
end
def self.update_first_unread(last_seen, limit: 10_000)
DB.exec(<<~SQL, min_date: last_seen, limit: limit)
UPDATE user_stats us
SET first_unread_at = Y.min_date
FROM (
SELECT u1.id user_id,
X.min min_date
FROM users u1
LEFT JOIN
(SELECT u.id AS user_id,
min(topics.updated_at) min
FROM users u
LEFT JOIN topic_users tu ON tu.user_id = u.id
LEFT JOIN topics ON tu.topic_id = topics.id
JOIN user_stats AS us ON us.user_id = u.id
JOIN user_options AS uo ON uo.user_id = u.id
JOIN categories c ON c.id = topics.category_id
WHERE u.id IN (
SELECT id
FROM users
WHERE last_seen_at IS NOT NULL
AND last_seen_at > :min_date
ORDER BY last_seen_at DESC
LIMIT :limit
)
AND topics.archetype <> 'private_message'
AND (("topics"."deleted_at" IS NULL
AND tu.last_read_post_number < CASE
WHEN u.admin
OR u.moderator THEN topics.highest_staff_post_number
ELSE topics.highest_post_number
END
AND COALESCE(tu.notification_level, 1) >= 2)
OR (1=0))
AND (topics.visible
OR u.admin
OR u.moderator)
AND topics.deleted_at IS NULL
AND (NOT c.read_restricted
OR u.admin
OR category_id IN
(SELECT c2.id
FROM categories c2
JOIN category_groups cg ON cg.category_id = c2.id
JOIN group_users gu ON gu.user_id = u.id
AND cg.group_id = gu.group_id
WHERE c2.read_restricted ))
AND NOT EXISTS
(SELECT 1
FROM category_users cu
WHERE last_read_post_number IS NULL
AND cu.user_id = u.id
AND cu.category_id = topics.category_id
AND cu.notification_level = 0)
GROUP BY u.id,
u.username) AS X ON X.user_id = u1.id
WHERE u1.id IN
(
SELECT id
FROM users
WHERE last_seen_at IS NOT NULL
AND last_seen_at > :min_date
ORDER BY last_seen_at DESC
LIMIT :limit
)
) Y
WHERE Y.user_id = us.user_id
SQL
end
def self.reset_bounce_scores
@ -133,4 +204,5 @@ end
# flags_agreed :integer default(0), not null
# flags_disagreed :integer default(0), not null
# flags_ignored :integer default(0), not null
# first_unread_at :datetime
#

View File

@ -0,0 +1,30 @@
class AddFirstUnreadAtToUserStats < ActiveRecord::Migration[5.2]
disable_ddl_transaction!
def up
# so we can rerun this if the index creation fails out of ddl
if !column_exists?(:user_stats, :first_unread_at)
add_column :user_stats, :first_unread_at, :datetime, null: false, default: -> { 'CURRENT_TIMESTAMP' }
end
execute <<~SQL
UPDATE user_stats us
SET first_unread_at = u.created_at
FROM users u
WHERE u.id = us.user_id
SQL
# this is quite a big index to carry, but we need it to optimise home page initial load
# by covering all these columns we are able to quickly retrieve the set of topics that were
# updated in the last N days. We perform a ranged lookup and selectivity may vary a lot
add_index :topics,
[:updated_at, :visible, :highest_staff_post_number, :highest_post_number, :category_id, :created_at, :id],
algorithm: :concurrently,
name: 'index_topics_on_updated_at_public',
where: "(topics.archetype <> 'private_message') AND (topics.deleted_at IS NULL)"
end
def down
raise ActiveRecord::IrreversibleMigration
end
end

View File

@ -469,6 +469,10 @@ class TopicQuery
staff: @user&.staff?)
.order('CASE WHEN topics.user_id = tu.user_id THEN 1 ELSE 2 END')
if @user
result = result.where("topics.updated_at >= (SELECT first_unread_at FROM user_stats WHERE user_id = ?)", @user.id)
end
self.class.results_filter_callbacks.each do |filter_callback|
result = filter_callback.call(:unread, result, @user, options)
end

View File

@ -72,6 +72,22 @@ describe UserStat do
end
end
describe 'ensure consistency!' do
it 'can update first unread' do
post = create_post
freeze_time 10.minutes.from_now
create_post(topic_id: post.topic_id)
post.user.update!(last_seen_at: Time.now)
UserStat.ensure_consistency!
post.user.user_stat.reload
expect(post.user.user_stat.first_unread_at).to eq_time(Time.now)
end
end
describe 'update_time_read!' do
let(:user) { Fabricate(:user) }
let(:stat) { user.user_stat }

View File

@ -723,6 +723,8 @@ RSpec.describe TopicsController do
context 'for last post only' do
it 'should allow you to retain topic timing but remove last post only' do
freeze_time
post1 = create_post
topic = post1.topic
@ -731,6 +733,8 @@ RSpec.describe TopicsController do
PostTiming.create!(topic: topic, user: user, post_number: 1, msecs: 100)
PostTiming.create!(topic: topic, user: user, post_number: 2, msecs: 100)
user.user_stat.update!(first_unread_at: Time.now + 1.week)
TopicUser.create!(
topic: topic,
user: user,
@ -747,6 +751,9 @@ RSpec.describe TopicsController do
expect(TopicUser.where(topic: topic, user: user, last_read_post_number: 1, highest_seen_post_number: 1).exists?).to eq(true)
user.user_stat.reload
expect(user.user_stat.first_unread_at).to eq_time(topic.updated_at)
PostDestroyer.new(Fabricate(:admin), post2).destroy
delete "/t/#{topic.id}/timings.json?last=1"