DEV: Secure upload rake task improvements (#29484)
This commit changes the uploads:secure_upload_analyse_and_update and uploads:disable_secure_uploads to no longer rebake affected posts inline. This just took way too long, and if the task stalled you couldn't be sure if the rest of it completed. Instead, we can update the baked_version of affected posts and utilize our PeriodicalUpdates job to gradually rebake them. I added warnings about increasing the site setting rebake_old_posts_count and the global setting max_old_rebakes_per_15_minutes before doing this as well. For good measure, the affected post IDs are written to a JSON file too.
This commit is contained in:
parent
d5b328b193
commit
1a3b9a7352
|
@ -545,6 +545,10 @@ task "uploads:sync_s3_acls" => :environment do
|
|||
end
|
||||
end
|
||||
|
||||
def secure_upload_rebake_warning
|
||||
puts "This task may mark a lot of posts for rebaking. To get through these quicker, the max_old_rebakes_per_15_minutes global setting (current value #{GlobalSetting.max_old_rebakes_per_15_minutes}) should be changed and the rebake_old_posts_count site setting (current value #{SiteSetting.rebake_old_posts_count}) increased as well. Do you want to proceed? (y/n)"
|
||||
end
|
||||
|
||||
# NOTE: This needs to be updated to use the _first_ UploadReference
|
||||
# record for each upload to determine security, and do not mark things
|
||||
# as secure if the first record is something public e.g. a site setting.
|
||||
|
@ -558,6 +562,9 @@ task "uploads:disable_secure_uploads" => :environment do
|
|||
exit 1
|
||||
end
|
||||
|
||||
secure_upload_rebake_warning
|
||||
exit 1 if STDIN.gets.chomp.downcase != "y"
|
||||
|
||||
puts "Disabling secure upload and resetting uploads to not secure in #{db}...", ""
|
||||
|
||||
SiteSetting.secure_uploads = false
|
||||
|
@ -583,8 +590,7 @@ task "uploads:disable_secure_uploads" => :environment do
|
|||
secure_upload_ids,
|
||||
)
|
||||
adjust_acls(secure_upload_ids)
|
||||
post_rebake_errors = rebake_upload_posts(post_ids_to_rebake)
|
||||
log_rebake_errors(post_rebake_errors)
|
||||
mark_upload_posts_for_rebake(post_ids_to_rebake)
|
||||
|
||||
puts "", "Rebaking and uploading complete!", ""
|
||||
end
|
||||
|
@ -612,6 +618,9 @@ task "uploads:secure_upload_analyse_and_update" => :environment do
|
|||
exit 1
|
||||
end
|
||||
|
||||
secure_upload_rebake_warning
|
||||
exit 1 if STDIN.gets.chomp.downcase != "y"
|
||||
|
||||
puts "Analyzing security for uploads in #{db}...", ""
|
||||
all_upload_ids_changed, post_ids_to_rebake = nil
|
||||
Upload.transaction do
|
||||
|
@ -627,11 +636,17 @@ task "uploads:secure_upload_analyse_and_update" => :environment do
|
|||
# Simply mark all uploads linked to posts secure if login_required because
|
||||
# no anons will be able to access them; however if secure_uploads_pm_only is
|
||||
# true then login_required will not mark other uploads secure.
|
||||
puts "",
|
||||
"Site is login_required, and secure_uploads_pm_only is false. Continuing with strategy to mark all post uploads as secure.",
|
||||
""
|
||||
post_ids_to_rebake, all_upload_ids_changed = mark_all_as_secure_login_required
|
||||
else
|
||||
# Otherwise only mark uploads linked to posts either:
|
||||
# * In secure categories or PMs if !SiteSetting.secure_uploads_pm_only
|
||||
# * In PMs if SiteSetting.secure_uploads_pm_only
|
||||
puts "",
|
||||
"Site is not login_required. Continuing with normal strategy to mark uploads in secure contexts as secure.",
|
||||
""
|
||||
post_ids_to_rebake, all_upload_ids_changed =
|
||||
update_specific_upload_security_no_login_required
|
||||
end
|
||||
|
@ -639,8 +654,14 @@ task "uploads:secure_upload_analyse_and_update" => :environment do
|
|||
|
||||
# Enqueue rebakes AFTER upload transaction complete, so there is no race condition
|
||||
# between updating the DB and the rebakes occurring.
|
||||
post_rebake_errors = rebake_upload_posts(post_ids_to_rebake)
|
||||
log_rebake_errors(post_rebake_errors)
|
||||
#
|
||||
# This is done asynchronously by changing the baked_version to NULL on
|
||||
# affected posts and relying on Post.rebake_old in the PeriodicalUpdates
|
||||
# job. To speed this up, these levers can be adjusted:
|
||||
#
|
||||
# * SiteSetting.rebake_old_posts_count
|
||||
# * GlobalSetting.max_old_rebakes_per_15_minutes
|
||||
mark_upload_posts_for_rebake(post_ids_to_rebake)
|
||||
|
||||
# Also do this AFTER upload transaction complete so we don't end up with any
|
||||
# errors leaving ACLs in a bad state (the ACL sync task can be run to fix any
|
||||
|
@ -677,7 +698,7 @@ def mark_all_as_secure_login_required
|
|||
security_last_changed_reason = 'upload security rake task mark as secure',
|
||||
security_last_changed_at = NOW()
|
||||
FROM upl
|
||||
WHERE uploads.id = upl.upload_id AND NOT uploads.secure
|
||||
WHERE uploads.id = upl.upload_id
|
||||
RETURNING uploads.id
|
||||
SQL
|
||||
puts "Marked #{post_upload_ids_marked_secure.count} upload(s) as secure because login_required is true.",
|
||||
|
@ -687,7 +708,7 @@ def mark_all_as_secure_login_required
|
|||
SET secure = false,
|
||||
security_last_changed_reason = 'upload security rake task mark as not secure',
|
||||
security_last_changed_at = NOW()
|
||||
WHERE id NOT IN (?) AND uploads.secure
|
||||
WHERE id NOT IN (?)
|
||||
RETURNING uploads.id
|
||||
SQL
|
||||
puts "Marked #{upload_ids_marked_not_secure.count} upload(s) as not secure because they are not linked to posts.",
|
||||
|
@ -797,24 +818,20 @@ def update_uploads_access_control_post
|
|||
SQL
|
||||
end
|
||||
|
||||
def rebake_upload_posts(post_ids_to_rebake)
|
||||
def mark_upload_posts_for_rebake(post_ids_to_rebake)
|
||||
posts_to_rebake = Post.where(id: post_ids_to_rebake)
|
||||
post_rebake_errors = []
|
||||
puts "", "Rebaking #{posts_to_rebake.length} posts with affected uploads.", ""
|
||||
begin
|
||||
i = 0
|
||||
posts_to_rebake.each do |post|
|
||||
RakeHelpers.print_status_with_label("Rebaking posts.....", i, posts_to_rebake.length)
|
||||
post.rebake!
|
||||
i += 1
|
||||
end
|
||||
|
||||
RakeHelpers.print_status_with_label("Rebaking complete! ", i, posts_to_rebake.length)
|
||||
puts ""
|
||||
rescue => e
|
||||
post_rebake_errors << e.message
|
||||
end
|
||||
post_rebake_errors
|
||||
puts "",
|
||||
"Marking #{posts_to_rebake.length} posts with affected uploads for rebake. Every 15 minutes, a batch of these will be enqueued for rebaking.",
|
||||
""
|
||||
posts_to_rebake.update_all(baked_version: nil)
|
||||
File.write(
|
||||
"secure_upload_analyse_and_update_posts_for_rebake.json",
|
||||
MultiJson.dump({ post_ids: post_ids_to_rebake }),
|
||||
)
|
||||
puts "",
|
||||
"Post IDs written to secure_upload_analyse_and_update_posts_for_rebake.json for reference",
|
||||
""
|
||||
end
|
||||
|
||||
def inline_uploads(post)
|
||||
|
|
|
@ -5,25 +5,26 @@ RSpec.describe "tasks/uploads" do
|
|||
Rake::Task.clear
|
||||
Discourse::Application.load_tasks
|
||||
SiteSetting.authorized_extensions += "|pdf"
|
||||
STDIN.stubs(:gets).returns("y\n")
|
||||
end
|
||||
|
||||
describe "uploads:secure_upload_analyse_and_update" do
|
||||
let!(:uploads) { [multi_post_upload1, upload1, upload2, upload3] }
|
||||
let(:multi_post_upload1) { Fabricate(:upload_s3) }
|
||||
let(:upload1) { Fabricate(:upload_s3) }
|
||||
let(:upload2) { Fabricate(:upload_s3) }
|
||||
let(:upload3) { Fabricate(:upload_s3, original_filename: "test.pdf", extension: "pdf") }
|
||||
let!(:uploads) { [multi_post_upload_1, upload_1, upload_2, upload_3] }
|
||||
let(:multi_post_upload_1) { Fabricate(:upload_s3) }
|
||||
let(:upload_1) { Fabricate(:upload_s3) }
|
||||
let(:upload_2) { Fabricate(:upload_s3) }
|
||||
let(:upload_3) { Fabricate(:upload_s3, original_filename: "test.pdf", extension: "pdf") }
|
||||
|
||||
let!(:post1) { Fabricate(:post) }
|
||||
let!(:post2) { Fabricate(:post) }
|
||||
let!(:post3) { Fabricate(:post) }
|
||||
let!(:post_1) { Fabricate(:post) }
|
||||
let!(:post_2) { Fabricate(:post) }
|
||||
let!(:post_3) { Fabricate(:post) }
|
||||
|
||||
before do
|
||||
UploadReference.create(target: post1, upload: multi_post_upload1)
|
||||
UploadReference.create(target: post2, upload: multi_post_upload1)
|
||||
UploadReference.create(target: post2, upload: upload1)
|
||||
UploadReference.create(target: post3, upload: upload2)
|
||||
UploadReference.create(target: post3, upload: upload3)
|
||||
UploadReference.create(target: post_1, upload: multi_post_upload_1)
|
||||
UploadReference.create(target: post_2, upload: multi_post_upload_1)
|
||||
UploadReference.create(target: post_2, upload: upload_1)
|
||||
UploadReference.create(target: post_3, upload: upload_2)
|
||||
UploadReference.create(target: post_3, upload: upload_3)
|
||||
end
|
||||
|
||||
def invoke_task
|
||||
|
@ -48,105 +49,121 @@ RSpec.describe "tasks/uploads" do
|
|||
|
||||
it "sets an access_control_post for each post upload, using the first linked post in the case of multiple links" do
|
||||
invoke_task
|
||||
expect(multi_post_upload1.reload.access_control_post).to eq(post1)
|
||||
expect(upload1.reload.access_control_post).to eq(post2)
|
||||
expect(upload2.reload.access_control_post).to eq(post3)
|
||||
expect(upload3.reload.access_control_post).to eq(post3)
|
||||
expect(multi_post_upload_1.reload.access_control_post).to eq(post_1)
|
||||
expect(upload_1.reload.access_control_post).to eq(post_2)
|
||||
expect(upload_2.reload.access_control_post).to eq(post_3)
|
||||
expect(upload_3.reload.access_control_post).to eq(post_3)
|
||||
end
|
||||
|
||||
context "when login_required" do
|
||||
before { SiteSetting.login_required = true }
|
||||
|
||||
it "sets everything attached to a post as secure and rebakes all those posts" do
|
||||
freeze_time
|
||||
|
||||
post1.update_columns(baked_at: 1.week.ago)
|
||||
post2.update_columns(baked_at: 1.week.ago)
|
||||
post3.update_columns(baked_at: 1.week.ago)
|
||||
after do
|
||||
if File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")
|
||||
File.delete("secure_upload_analyse_and_update_posts_for_rebake.json")
|
||||
end
|
||||
end
|
||||
|
||||
it "sets everything attached to a post as secure" do
|
||||
invoke_task
|
||||
|
||||
expect(post1.reload.baked_at).not_to eq_time(1.week.ago)
|
||||
expect(post2.reload.baked_at).not_to eq_time(1.week.ago)
|
||||
expect(post3.reload.baked_at).not_to eq_time(1.week.ago)
|
||||
expect(upload2.reload.secure).to eq(true)
|
||||
expect(upload1.reload.secure).to eq(true)
|
||||
expect(upload3.reload.secure).to eq(true)
|
||||
expect(upload_2.reload.secure).to eq(true)
|
||||
expect(upload_1.reload.secure).to eq(true)
|
||||
expect(upload_3.reload.secure).to eq(true)
|
||||
end
|
||||
|
||||
it "writes a file with the post IDs to rebake" do
|
||||
invoke_task
|
||||
|
||||
expect(File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")).to eq(
|
||||
true,
|
||||
)
|
||||
expect(
|
||||
JSON.parse(File.read("secure_upload_analyse_and_update_posts_for_rebake.json")),
|
||||
).to eq({ "post_ids" => [post_1.id, post_2.id, post_3.id] })
|
||||
end
|
||||
|
||||
it "sets the baked_version to NULL for affected posts" do
|
||||
invoke_task
|
||||
|
||||
expect(post_1.reload.baked_version).to eq(nil)
|
||||
expect(post_2.reload.baked_version).to eq(nil)
|
||||
expect(post_3.reload.baked_version).to eq(nil)
|
||||
end
|
||||
|
||||
context "when secure_uploads_pm_only" do
|
||||
before { SiteSetting.secure_uploads_pm_only = true }
|
||||
|
||||
it "only sets everything attached to a private message post as secure and rebakes all those posts" do
|
||||
freeze_time
|
||||
|
||||
post1.update_columns(baked_at: 1.week.ago)
|
||||
post2.update_columns(baked_at: 1.week.ago)
|
||||
post3.update_columns(baked_at: 1.week.ago)
|
||||
post3.topic.update(archetype: "private_message", category: nil)
|
||||
post_3.topic.update(archetype: "private_message", category: nil)
|
||||
|
||||
invoke_task
|
||||
|
||||
expect(post1.reload.baked_at).to eq_time(1.week.ago)
|
||||
expect(post2.reload.baked_at).to eq_time(1.week.ago)
|
||||
expect(post3.reload.baked_at).not_to eq_time(1.week.ago)
|
||||
expect(upload1.reload.secure).to eq(false)
|
||||
expect(upload2.reload.secure).to eq(true)
|
||||
expect(upload3.reload.secure).to eq(true)
|
||||
expect(post_1.reload.baked_version).not_to eq(nil)
|
||||
expect(post_2.reload.baked_version).not_to eq(nil)
|
||||
expect(post_3.reload.baked_version).to eq(nil)
|
||||
expect(upload_1.reload.secure).to eq(false)
|
||||
expect(upload_2.reload.secure).to eq(true)
|
||||
expect(upload_3.reload.secure).to eq(true)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "when secure_uploads_pm_only" do
|
||||
before { SiteSetting.secure_uploads_pm_only = true }
|
||||
|
||||
it "sets a non-PM post upload to not secure" do
|
||||
upload_2.update!(secure: true)
|
||||
invoke_task
|
||||
expect(upload_2.reload.secure).to eq(false)
|
||||
end
|
||||
end
|
||||
|
||||
it "sets the uploads that are media and attachments in the read restricted topic category to secure" do
|
||||
post3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group)))
|
||||
post_3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group)))
|
||||
invoke_task
|
||||
expect(upload2.reload.secure).to eq(true)
|
||||
expect(upload1.reload.secure).to eq(false)
|
||||
expect(upload3.reload.secure).to eq(true)
|
||||
expect(upload_2.reload.secure).to eq(true)
|
||||
expect(upload_1.reload.secure).to eq(false)
|
||||
expect(upload_3.reload.secure).to eq(true)
|
||||
end
|
||||
|
||||
it "sets the upload in the PM topic to secure" do
|
||||
post3.topic.update(archetype: "private_message", category: nil)
|
||||
post_3.topic.update(archetype: "private_message", category: nil)
|
||||
invoke_task
|
||||
expect(upload2.reload.secure).to eq(true)
|
||||
expect(upload1.reload.secure).to eq(false)
|
||||
expect(upload_2.reload.secure).to eq(true)
|
||||
expect(upload_1.reload.secure).to eq(false)
|
||||
end
|
||||
|
||||
it "rebakes the posts attached for uploads that change secure status" do
|
||||
post3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group)))
|
||||
freeze_time
|
||||
|
||||
post1.update_columns(baked_at: 1.week.ago)
|
||||
post2.update_columns(baked_at: 1.week.ago)
|
||||
post3.update_columns(baked_at: 1.week.ago)
|
||||
it "sets the baked_version version to NULL for the posts attached for uploads that change secure status" do
|
||||
post_3.topic.update(category: Fabricate(:private_category, group: Fabricate(:group)))
|
||||
|
||||
invoke_task
|
||||
|
||||
expect(post1.reload.baked_at).to eq_time(1.week.ago)
|
||||
expect(post2.reload.baked_at).to eq_time(1.week.ago)
|
||||
expect(post3.reload.baked_at).not_to eq_time(1.week.ago)
|
||||
expect(post_1.reload.baked_version).not_to eq(nil)
|
||||
expect(post_2.reload.baked_version).not_to eq(nil)
|
||||
expect(post_3.reload.baked_version).to eq(nil)
|
||||
end
|
||||
|
||||
context "for an upload that is already secure and does not need to change" do
|
||||
before do
|
||||
post3.topic.update(archetype: "private_message", category: nil)
|
||||
upload2.update(access_control_post: post3)
|
||||
upload2.update_secure_status
|
||||
upload3.update(access_control_post: post3)
|
||||
upload3.update_secure_status
|
||||
post_3.topic.update(archetype: "private_message", category: nil)
|
||||
upload_2.update(access_control_post: post_3)
|
||||
upload_2.update_secure_status
|
||||
upload_3.update(access_control_post: post_3)
|
||||
upload_3.update_secure_status
|
||||
end
|
||||
|
||||
it "does not rebake the associated post" do
|
||||
freeze_time
|
||||
|
||||
post3.update_columns(baked_at: 1.week.ago)
|
||||
post_3.update_columns(baked_at: 1.week.ago)
|
||||
invoke_task
|
||||
|
||||
expect(post3.reload.baked_at).to eq_time(1.week.ago)
|
||||
expect(post_3.reload.baked_at).to eq_time(1.week.ago)
|
||||
end
|
||||
|
||||
it "does not attempt to update the acl" do
|
||||
FileStore::S3Store.any_instance.expects(:update_upload_ACL).with(upload2).never
|
||||
FileStore::S3Store.any_instance.expects(:update_upload_ACL).with(upload_2).never
|
||||
invoke_task
|
||||
end
|
||||
end
|
||||
|
@ -179,19 +196,25 @@ RSpec.describe "tasks/uploads" do
|
|||
uploads.each { |upload| stub_upload(upload) }
|
||||
|
||||
SiteSetting.secure_uploads = true
|
||||
UploadReference.create(target: post1, upload: upload1)
|
||||
UploadReference.create(target: post1, upload: upload2)
|
||||
UploadReference.create(target: post2, upload: upload3)
|
||||
UploadReference.create(target: post2, upload: upload4)
|
||||
UploadReference.create(target: post_1, upload: upload_1)
|
||||
UploadReference.create(target: post_1, upload: upload_2)
|
||||
UploadReference.create(target: post_2, upload: upload_3)
|
||||
UploadReference.create(target: post_2, upload: upload4)
|
||||
end
|
||||
|
||||
let!(:uploads) { [upload1, upload2, upload3, upload4, upload5] }
|
||||
let(:post1) { Fabricate(:post) }
|
||||
let(:post2) { Fabricate(:post) }
|
||||
let(:upload1) { Fabricate(:upload_s3, secure: true, access_control_post: post1) }
|
||||
let(:upload2) { Fabricate(:upload_s3, secure: true, access_control_post: post1) }
|
||||
let(:upload3) { Fabricate(:upload_s3, secure: true, access_control_post: post2) }
|
||||
let(:upload4) { Fabricate(:upload_s3, secure: true, access_control_post: post2) }
|
||||
after do
|
||||
if File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")
|
||||
File.delete("secure_upload_analyse_and_update_posts_for_rebake.json")
|
||||
end
|
||||
end
|
||||
|
||||
let!(:uploads) { [upload_1, upload_2, upload_3, upload4, upload5] }
|
||||
let(:post_1) { Fabricate(:post) }
|
||||
let(:post_2) { Fabricate(:post) }
|
||||
let(:upload_1) { Fabricate(:upload_s3, secure: true, access_control_post: post_1) }
|
||||
let(:upload_2) { Fabricate(:upload_s3, secure: true, access_control_post: post_1) }
|
||||
let(:upload_3) { Fabricate(:upload_s3, secure: true, access_control_post: post_2) }
|
||||
let(:upload4) { Fabricate(:upload_s3, secure: true, access_control_post: post_2) }
|
||||
let(:upload5) { Fabricate(:upload_s3, secure: false) }
|
||||
|
||||
it "disables the secure upload setting" do
|
||||
|
@ -201,24 +224,29 @@ RSpec.describe "tasks/uploads" do
|
|||
|
||||
it "updates all secure uploads to secure: false" do
|
||||
invoke_task
|
||||
[upload1, upload2, upload3, upload4].each { |upl| expect(upl.reload.secure).to eq(false) }
|
||||
[upload_1, upload_2, upload_3, upload4].each { |upl| expect(upl.reload.secure).to eq(false) }
|
||||
end
|
||||
|
||||
it "rebakes the associated posts" do
|
||||
freeze_time
|
||||
|
||||
post1.update_columns(baked_at: 1.week.ago)
|
||||
post2.update_columns(baked_at: 1.week.ago)
|
||||
it "sets the baked_version to NULL for affected posts" do
|
||||
invoke_task
|
||||
|
||||
expect(post1.reload.baked_at).not_to eq_time(1.week.ago)
|
||||
expect(post2.reload.baked_at).not_to eq_time(1.week.ago)
|
||||
expect(post_1.reload.baked_version).to eq(nil)
|
||||
expect(post_2.reload.baked_version).to eq(nil)
|
||||
end
|
||||
|
||||
it "writes a file with the post IDs to rebake" do
|
||||
invoke_task
|
||||
|
||||
expect(File.exist?("secure_upload_analyse_and_update_posts_for_rebake.json")).to eq(true)
|
||||
expect(JSON.parse(File.read("secure_upload_analyse_and_update_posts_for_rebake.json"))).to eq(
|
||||
{ "post_ids" => [post_1.id, post_2.id] },
|
||||
)
|
||||
end
|
||||
|
||||
it "updates the affected ACLs via the SyncAclsForUploads job" do
|
||||
invoke_task
|
||||
expect(Jobs::SyncAclsForUploads.jobs.last["args"][0]["upload_ids"]).to match_array(
|
||||
[upload1.id, upload2.id, upload3.id, upload4.id],
|
||||
[upload_1.id, upload_2.id, upload_3.id, upload4.id],
|
||||
)
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue