PERF: Improve performance of post_search_data migration

Very large batches can take an enormous amount of time due to churn

Limiting to 200k changes at a time gives us a far larger chance of finishing
the job without timing out or deadlocking.
This commit is contained in:
Sam Saffron 2020-08-20 08:44:53 +10:00
parent 8348a41124
commit d2c504ea86
No known key found for this signature in database
GPG Key ID: B9606168D2FFD9F5

View File

@ -5,23 +5,32 @@ class UpdatePrivateMessageOnPostSearchData < ActiveRecord::Migration[6.0]
disable_ddl_transaction! disable_ddl_transaction!
def update_private_message_flag def update_private_message_flag
execute <<~SQL
sql = <<~SQL
UPDATE post_search_data UPDATE post_search_data
SET private_message = true SET private_message = X.private_message
FROM posts FROM
INNER JOIN topics ON topics.id = posts.topic_id AND topics.archetype = 'private_message' (
WHERE posts.id = post_search_data.post_id AND SELECT post_id,
(private_message IS NULL or private_message = false) CASE WHEN t.archetype = 'private_message' THEN TRUE ELSE FALSE END private_message
FROM posts p
JOIN post_search_data pd ON pd.post_id = p.id
JOIN topics t ON t.id = p.topic_id
WHERE pd.private_message IS NULL OR
pd.private_message <> CASE WHEN t.archetype = 'private_message' THEN TRUE ELSE FALSE END
LIMIT 200000
) X
WHERE X.post_id = post_search_data.post_id
SQL SQL
execute <<~SQL while true
UPDATE post_search_data count = execute(sql).cmd_tuples
SET private_message = false if count == 0
FROM posts break
INNER JOIN topics ON topics.id = posts.topic_id AND topics.archetype <> 'private_message' else
WHERE posts.id = post_search_data.post_id AND puts "Migrated batch of #{count} on post_search_date to new schema"
(private_message IS NULL or private_message = true) end
SQL end
end end
def up def up