PERF: Speed up secure media and ACL sync rake tasks (#16849)

Incorporates learnings from /t/64227:

* Changes the code to set access control posts in the rake
  task to be an efficient UPDATE SQL query.
  The original version was timing out with 312017 post uploads,
  the new query took ~3s to run.
* Changes the code to mark uploads as secure/not secure in
  the rake task to be an efficient UPDATE SQL query rather than
  using UploadSecurity. This took a very long time previously,
  and now takes only a few seconds.
* Spread out ACL syncing for uploads into jobs with batches of
  100 uploads at a time, so they can be parallelized instead
  of having to wait ~1.25 seconds for each ACL to be changed
  in S3 serially.

One issue that still remains is post rebaking. Doing this serially
is painfully slow. We have a way to do this in sidekiq via PeriodicalUpdates
but this is limited by max_old_rebakes_per_15_minutes. It would
be better to fan this rebaking out into jobs like we did for the
ACL sync, but that should be done in another PR.
This commit is contained in:
Martin Brennan 2022-05-23 13:14:11 +10:00 committed by GitHub
parent fcc2e7ebbf
commit faf5b4d3e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 185 additions and 175 deletions

View File

@ -18,7 +18,18 @@ module Jobs
Rails.logger.warn("Syncing ACL for upload ids: #{args[:upload_ids].join(", ")}")
Upload.includes(:optimized_images).where(id: args[:upload_ids]).find_in_batches do |uploads|
uploads.each do |upload|
Discourse.store.update_upload_ACL(upload, optimized_images_preloaded: true)
begin
Discourse.store.update_upload_ACL(upload, optimized_images_preloaded: true)
rescue => err
Discourse.warn_exception(
err,
message: "Failed to update upload ACL",
env: {
upload_id: upload.id,
filename: upload.original_filename
}
)
end
end
end
Rails.logger.warn("Completed syncing ACL for upload ids in #{time.to_s}. IDs: #{args[:upload_ids].join(", ")}")

View File

@ -506,10 +506,14 @@ task "uploads:sync_s3_acls" => :environment do
exit 1
end
puts "CAUTION: This task may take a long time to complete!"
puts "CAUTION: This task may take a long time to complete! There are #{Upload.count} uploads to sync ACLs for."
puts ""
puts "-" * 30
puts "Uploads marked as secure will get a private ACL, and uploads marked as not secure will get a public ACL."
adjust_acls(Upload.find_each(batch_size: 100))
puts "Upload ACLs will be updated in Sidekiq jobs in batches of 100 at a time, check Sidekiq queues for SyncAclsForUploads for progress."
Upload.select(:id).find_in_batches(batch_size: 100) do |uploads|
adjust_acls(uploads.map(&:id))
end
puts "", "Upload ACL sync complete!"
end
end
@ -525,43 +529,30 @@ task "uploads:disable_secure_media" => :environment do
SiteSetting.secure_media = false
secure_uploads = Upload.includes(:posts).where(secure: true)
secure_uploads = Upload.joins(:post_uploads).where(secure: true)
secure_upload_count = secure_uploads.count
uploads_to_adjust_acl_for = []
posts_to_rebake = {}
i = 0
secure_uploads.find_each(batch_size: 20).each do |upload|
uploads_to_adjust_acl_for << upload
upload.posts.each do |post|
# don't want unnecessary double-ups
next if posts_to_rebake.key?(post.id)
posts_to_rebake[post.id] = post
end
i += 1
end
secure_upload_ids = secure_uploads.pluck(:id)
puts "", "Marking #{secure_upload_count} uploads as not secure.", ""
secure_uploads.update_all(secure: false)
secure_uploads.update_all(
secure: false,
security_last_changed_at: Time.zone.now,
security_last_changed_reason: "marked as not secure by disable_secure_media task"
)
adjust_acls(uploads_to_adjust_acl_for)
post_rebake_errors = rebake_upload_posts(posts_to_rebake)
post_ids_to_rebake = DB.query_single(
"SELECT DISTINCT post_id FROM post_uploads WHERE upload_id IN (?)", secure_upload_ids
)
adjust_acls(secure_upload_ids)
post_rebake_errors = rebake_upload_posts(post_ids_to_rebake)
log_rebake_errors(post_rebake_errors)
RakeHelpers.print_status_with_label("Rebaking and updating complete! ", i, secure_upload_count)
puts "", "Rebaking and uploading complete!", ""
end
puts "", "Secure media is now disabled!", ""
end
# Renamed to uploads:secure_upload_analyse_and_update
task "uploads:ensure_correct_acl" => :environment do
puts "This task has been deprecated, run uploads:secure_upload_analyse_and_update task instead."
exit 1
end
##
# Run this task whenever the secure_media or login_required
# settings are changed for a Discourse instance to update
@ -576,10 +567,8 @@ task "uploads:secure_upload_analyse_and_update" => :environment do
end
puts "Analyzing security for uploads in #{db}...", ""
upload_ids_to_mark_as_secure, upload_ids_to_mark_as_not_secure, posts_to_rebake, uploads_to_adjust_acl_for = nil
all_upload_ids_changed, post_ids_to_rebake = nil
Upload.transaction do
mark_secure_in_loop_because_no_login_required = false
# If secure media is enabled we need to first set the access control post of
# all post uploads (even uploads that are linked to multiple posts). If the
# upload is not set to secure media then this has no other effect on the upload,
@ -589,73 +578,77 @@ task "uploads:secure_upload_analyse_and_update" => :environment do
update_uploads_access_control_post
end
# Get all uploads in the database, including optimized images. Both media (images, videos,
# etc) along with attachments (pdfs, txt, etc.) must be loaded because all can be marked as
# secure based on site settings.
uploads_to_update = Upload.includes(:posts, :optimized_images).joins(:post_uploads)
puts "There are #{uploads_to_update.count} upload(s) that could be marked secure.", ""
# Simply mark all these uploads as secure if login_required because no anons will be able to access them
puts "", "Analysing which uploads need to be marked secure and be rebaked.", ""
if SiteSetting.login_required?
mark_secure_in_loop_because_no_login_required = false
# Simply mark all uploads linked to posts secure if login_required because no anons will be able to access them.
post_ids_to_rebake, all_upload_ids_changed = mark_all_as_secure_login_required
else
# If NOT login_required, then we have to go for the other slower flow, where in the loop
# we mark the upload secure based on UploadSecurity.should_be_secure?
mark_secure_in_loop_because_no_login_required = true
puts "Marking posts as secure in the next step because login_required is false."
end
puts "", "Analysing which of #{uploads_to_update.count} uploads need to be marked secure and be rebaked.", ""
upload_ids_to_mark_as_secure,
upload_ids_to_mark_as_not_secure,
posts_to_rebake,
uploads_to_adjust_acl_for = determine_upload_security_and_posts_to_rebake(
uploads_to_update, mark_secure_in_loop_because_no_login_required
)
if !SiteSetting.login_required?
update_specific_upload_security_no_login_required(upload_ids_to_mark_as_secure, upload_ids_to_mark_as_not_secure)
else
mark_all_as_secure_login_required(uploads_to_update)
# Otherwise only mark uploads linked to posts in secure categories or PMs as secure.
post_ids_to_rebake, all_upload_ids_changed = update_specific_upload_security_no_login_required
end
end
# Enqueue rebakes AFTER upload transaction complete, so there is no race condition
# between updating the DB and the rebakes occurring.
post_rebake_errors = rebake_upload_posts(posts_to_rebake)
post_rebake_errors = rebake_upload_posts(post_ids_to_rebake)
log_rebake_errors(post_rebake_errors)
# Also do this AFTER upload transaction complete so we don't end up with any
# errors leaving ACLs in a bad state (the ACL sync task can be run to fix any
# outliers at any time).
adjust_acls(uploads_to_adjust_acl_for)
adjust_acls(all_upload_ids_changed)
end
puts "", "", "Done!"
end
def adjust_acls(uploads_to_adjust_acl_for)
total_count = uploads_to_adjust_acl_for.respond_to?(:length) ? uploads_to_adjust_acl_for.length : uploads_to_adjust_acl_for.count
puts "", "Updating ACL for #{total_count} uploads.", ""
i = 0
uploads_to_adjust_acl_for.each do |upload|
RakeHelpers.print_status_with_label("Updating ACL for upload.......", i, total_count)
Discourse.store.update_upload_ACL(upload)
i += 1
def adjust_acls(upload_ids_to_adjust_acl_for)
jobs_to_create = (upload_ids_to_adjust_acl_for.count.to_f / 100.00).ceil
if jobs_to_create > 1
puts "Adjusting ACLs for #{upload_ids_to_adjust_acl_for} uploads. These will be batched across #{jobs_to_create} sync job(s)."
end
upload_ids_to_adjust_acl_for.each_slice(100) do |upload_ids|
Jobs.enqueue(:sync_acls_for_uploads, upload_ids: upload_ids)
end
if jobs_to_create > 1
puts "ACL batching complete. Keep an eye on the Sidekiq queue for progress."
end
RakeHelpers.print_status_with_label("Updating ACLs complete! ", i, total_count)
end
def mark_all_as_secure_login_required(uploads_to_update)
puts "Marking #{uploads_to_update.count} upload(s) as secure because login_required is true.", ""
uploads_to_update.update_all(
secure: true,
security_last_changed_at: Time.zone.now,
security_last_changed_reason: "upload security rake task all secure login required"
)
def mark_all_as_secure_login_required
post_upload_ids_marked_secure = DB.query_single(<<~SQL)
WITH upl AS (
SELECT DISTINCT ON (upload_id) upload_id
FROM post_uploads
INNER JOIN posts ON posts.id = post_uploads.post_id
INNER JOIN topics ON topics.id = posts.topic_id
)
UPDATE uploads
SET secure = true,
security_last_changed_reason = 'upload security rake task mark as secure',
security_last_changed_at = NOW()
FROM upl
WHERE uploads.id = upl.upload_id AND NOT uploads.secure
RETURNING uploads.id
SQL
puts "Marked #{post_upload_ids_marked_secure.count} upload(s) as secure because login_required is true.", ""
upload_ids_marked_not_secure = DB.query_single(<<~SQL, post_upload_ids_marked_secure)
UPDATE uploads
SET secure = false,
security_last_changed_reason = 'upload security rake task mark as not secure',
security_last_changed_at = NOW()
WHERE id NOT IN (?) AND uploads.secure
RETURNING uploads.id
SQL
puts "Marked #{upload_ids_marked_not_secure.count} upload(s) as not secure because they are not linked to posts.", ""
puts "Finished marking upload(s) as secure."
post_ids_to_rebake = DB.query_single(
"SELECT DISTINCT post_id FROM post_uploads WHERE upload_id IN (?)", post_upload_ids_marked_secure
)
[post_ids_to_rebake, (post_upload_ids_marked_secure + upload_ids_marked_not_secure).uniq]
end
def log_rebake_errors(rebake_errors)
@ -666,45 +659,81 @@ def log_rebake_errors(rebake_errors)
end
end
def update_specific_upload_security_no_login_required(upload_ids_to_mark_as_secure, upload_ids_to_mark_as_not_secure)
if upload_ids_to_mark_as_secure.any?
puts "Marking #{upload_ids_to_mark_as_secure.length} uploads as secure because UploadSecurity determined them to be secure."
Upload.where(id: upload_ids_to_mark_as_secure).update_all(
secure: true,
security_last_changed_at: Time.zone.now,
security_last_changed_reason: "upload security rake task mark as secure"
def update_specific_upload_security_no_login_required
# A simplification of the rules found in UploadSecurity which is a lot faster than
# having to loop through records and use that class to check security.
post_upload_ids_marked_secure = DB.query_single(<<~SQL)
WITH upl AS (
SELECT DISTINCT ON (upload_id) upload_id
FROM post_uploads
INNER JOIN posts ON posts.id = post_uploads.post_id
INNER JOIN topics ON topics.id = posts.topic_id
LEFT JOIN categories ON categories.id = topics.category_id
WHERE (topics.category_id IS NOT NULL AND categories.read_restricted) OR
(topics.archetype = 'private_message')
)
end
if upload_ids_to_mark_as_not_secure.any?
puts "Marking #{upload_ids_to_mark_as_not_secure.length} uploads as not secure because UploadSecurity determined them to be not secure."
Upload.where(id: upload_ids_to_mark_as_not_secure).update_all(
secure: false,
security_last_changed_at: Time.zone.now,
security_last_changed_reason: "upload security rake task mark as not secure"
UPDATE uploads
SET secure = true,
security_last_changed_reason = 'upload security rake task mark as secure',
security_last_changed_at = NOW()
FROM upl
WHERE uploads.id = upl.upload_id AND NOT uploads.secure
RETURNING uploads.id
SQL
puts "Marked #{post_upload_ids_marked_secure.length} uploads as secure."
# Anything in a public category or a regular topic should not be secure.
post_upload_ids_marked_not_secure = DB.query_single(<<~SQL)
WITH upl AS (
SELECT DISTINCT ON (upload_id) upload_id
FROM post_uploads
INNER JOIN posts ON posts.id = post_uploads.post_id
INNER JOIN topics ON topics.id = posts.topic_id
LEFT JOIN categories ON categories.id = topics.category_id
WHERE (topics.archetype = 'regular' AND topics.category_id IS NOT NULL AND NOT categories.read_restricted) OR
(topics.archetype = 'regular' AND topics.category_id IS NULL)
)
end
puts "Finished updating upload security."
UPDATE uploads
SET secure = false,
security_last_changed_reason = 'upload security rake task mark as not secure',
security_last_changed_at = NOW()
FROM upl
WHERE uploads.id = upl.upload_id AND uploads.secure
RETURNING uploads.id
SQL
puts "Marked #{post_upload_ids_marked_not_secure.length} uploads as not secure."
# Everything else should not be secure!
upload_ids_changed = (post_upload_ids_marked_secure + post_upload_ids_marked_not_secure).uniq
upload_ids_marked_not_secure = DB.query_single(<<~SQL, upload_ids_changed)
UPDATE uploads
SET secure = false,
security_last_changed_reason = 'upload security rake task mark as not secure',
security_last_changed_at = NOW()
WHERE id NOT IN (?) AND uploads.secure
RETURNING uploads.id
SQL
puts "Finished updating upload security. Marked #{upload_ids_marked_not_secure.length} uploads not linked to posts as not secure."
all_upload_ids_changed = (upload_ids_changed + upload_ids_marked_not_secure).uniq
post_ids_to_rebake = DB.query_single("SELECT DISTINCT post_id FROM post_uploads WHERE upload_id IN (?)", upload_ids_changed)
[post_ids_to_rebake, all_upload_ids_changed]
end
def update_uploads_access_control_post
access_control_post_updates = []
uploads_with_post_ids = DB.query(<<-SQL
SELECT upload_id, (
SELECT string_agg(CAST(post_uploads.post_id AS varchar), ',' ORDER BY post_uploads.id) as post_ids
FROM post_uploads
WHERE pu.upload_id = post_uploads.upload_id
) FROM post_uploads pu
DB.exec(<<~SQL)
WITH upl AS (
SELECT DISTINCT ON (upload_id) upload_id, post_id FROM post_uploads ORDER BY upload_id, post_id
)
UPDATE uploads
SET access_control_post_id = upl.post_id
FROM upl
WHERE uploads.id = upl.upload_id
SQL
)
uploads_with_post_ids.each do |row|
first_post_id = row.post_ids.split(",").first.to_i
access_control_post_updates << "UPDATE uploads SET access_control_post_id = #{first_post_id} WHERE id = #{row.upload_id};"
end
DB.exec(access_control_post_updates.join("\n"))
end
def rebake_upload_posts(posts_to_rebake)
posts_to_rebake = posts_to_rebake.values
def rebake_upload_posts(post_ids_to_rebake)
posts_to_rebake = Post.where(id: post_ids_to_rebake)
post_rebake_errors = []
puts "", "Rebaking #{posts_to_rebake.length} posts with affected uploads.", ""
begin
@ -723,69 +752,6 @@ def rebake_upload_posts(posts_to_rebake)
post_rebake_errors
end
def determine_upload_security_and_posts_to_rebake(uploads_to_update, mark_secure_in_loop_because_no_login_required)
upload_ids_to_mark_as_secure = []
upload_ids_to_mark_as_not_secure = []
uploads_to_adjust_acl_for = []
posts_to_rebake = {}
# we do this to avoid a heavier post query, and to make sure we only
# get unique posts AND include deleted posts (unscoped)
unique_access_control_posts = {}
Post.unscoped.select(:id, :topic_id)
.includes(topic: :category)
.where(id: uploads_to_update.pluck(:access_control_post_id).uniq).find_each do |post|
unique_access_control_posts[post.id] = post
end
i = 0
uploads_to_update.find_each do |upload_to_update|
# fetch the post out of the already populated map to avoid n1s
upload_to_update.access_control_post = unique_access_control_posts[upload_to_update.access_control_post_id]
# we just need to determine the post security here so the ACL is set to the correct thing,
# because the update_upload_ACL method uses upload.secure?
original_update_secure_status = upload_to_update.secure
upload_to_update.secure = UploadSecurity.new(upload_to_update).should_be_secure?
# no point changing ACLs or rebaking or doing any such shenanigans
# when the secure status hasn't even changed!
if original_update_secure_status == upload_to_update.secure
i += 1
next
end
# we only want to update the acl later once the secure status
# has been saved in the DB; otherwise if there is a later failure
# we get stuck with an incorrect ACL in S3
uploads_to_adjust_acl_for << upload_to_update
RakeHelpers.print_status_with_label("Analysing which upload posts to rebake.....", i, uploads_to_update.count)
upload_to_update.posts.each do |post|
# don't want unnecessary double-ups
next if posts_to_rebake.key?(post.id)
posts_to_rebake[post.id] = post
end
# some uploads will be marked as not secure here.
# we need to address this with upload_ids_to_mark_as_not_secure
# e.g. turning off SiteSetting.login_required
if mark_secure_in_loop_because_no_login_required
if upload_to_update.secure?
upload_ids_to_mark_as_secure << upload_to_update.id
else
upload_ids_to_mark_as_not_secure << upload_to_update.id
end
end
i += 1
end
RakeHelpers.print_status_with_label("Analysis complete! ", i, uploads_to_update.count)
puts ""
[upload_ids_to_mark_as_secure, upload_ids_to_mark_as_not_secure, posts_to_rebake, uploads_to_adjust_acl_for]
end
def inline_uploads(post)
replaced = false

View File

@ -15,6 +15,11 @@
# on the current secure? status, otherwise there would be a lot of additional
# complex queries and joins to perform. Over time more of these specific
# queries will be implemented.
#
# NOTE: When updating this to add more cases where uploads will be marked
# secure, consider uploads:secure_upload_analyse_and_update as well, which
# does not use this class directly but uses an SQL version of its rules for
# efficient updating of many uploads in bulk.
class UploadSecurity
@@custom_public_types = []

View File

@ -27,5 +27,11 @@ describe Jobs::SyncAclsForUploads do
Discourse.store.expects(:update_upload_ACL).times(3)
run_job
end
it "handles updates throwing an exception" do
Discourse.store.expects(:update_upload_ACL).raises(StandardError).then.returns(true, true).times(3)
Discourse.expects(:warn_exception).once
run_job
end
end
end

View File

@ -65,6 +65,24 @@ RSpec.describe "tasks/uploads" do
expect(upload3.reload.access_control_post).to eq(post3)
end
it "sets everything attached to a post as secure and rebakes all those posts if login is required" do
SiteSetting.login_required = true
freeze_time
post1.update_columns(baked_at: 1.week.ago)
post2.update_columns(baked_at: 1.week.ago)
post3.update_columns(baked_at: 1.week.ago)
invoke_task
expect(post1.reload.baked_at).not_to eq_time(1.week.ago)
expect(post2.reload.baked_at).not_to eq_time(1.week.ago)
expect(post3.reload.baked_at).not_to eq_time(1.week.ago)
expect(upload2.reload.secure).to eq(true)
expect(upload1.reload.secure).to eq(true)
expect(upload3.reload.secure).to eq(true)
end
it "sets the uploads that are media and attachments in the read restricted topic category to secure" do
post3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group)))
invoke_task
@ -192,8 +210,12 @@ RSpec.describe "tasks/uploads" do
end
it "updates the affected ACLs" do
FileStore::S3Store.any_instance.expects(:update_upload_ACL).times(4)
invoke_task
expect_enqueued_with(
job: :sync_acls_for_uploads,
args: { upload_ids: [upload1.id, upload2.id, upload3.id, upload4.id] },
) do
invoke_task
end
end
end
end