FEATURE: Add uploads:batch_migrate_from_s3 task to limit total posts migrated at once (#9933)

Allow limiting the number of migrations to do at once, both to do migrations that
have impact limited to multiple off-peak usage hours to reduce user impact from
a migration, and to allow tests that do only a very small number for test
purposes. ("Give me a ping, Vasili. One ping only, please.")
This commit is contained in:
Michael K Johnson 2020-06-03 19:48:11 -04:00 committed by GitHub
parent a89574ccb9
commit 81e6bc7a0f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 122 additions and 7 deletions

View File

@ -93,6 +93,10 @@ task "uploads:migrate_from_s3" => :environment do
ENV["RAILS_DB"] ? migrate_from_s3 : migrate_all_from_s3
end
task "uploads:batch_migrate_from_s3", [:limit] => :environment do |_, args|
ENV["RAILS_DB"] ? migrate_from_s3(limit: args[:limit]) : migrate_all_from_s3(limit: args[:limit])
end
def guess_filename(url, raw)
begin
uri = URI.parse("http:#{url}")
@ -110,17 +114,17 @@ def guess_filename(url, raw)
end
end
def migrate_all_from_s3
RailsMultisite::ConnectionManagement.each_connection { migrate_from_s3 }
def migrate_all_from_s3(limit: nil)
RailsMultisite::ConnectionManagement.each_connection { migrate_from_s3(limit: limit) }
end
def migrate_from_s3
def migrate_from_s3(limit: nil)
require "file_store/s3_store"
# make sure S3 is disabled
if SiteSetting.Upload.enable_s3_uploads
puts "You must disable S3 uploads before running that task."
return
exit 1
end
db = RailsMultisite::ConnectionManagement.current_db
@ -129,10 +133,12 @@ def migrate_from_s3
max_file_size = [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes
Post
migrate_posts = Post
.where("user_id > 0")
.where("raw LIKE '%.s3%.amazonaws.com/%' OR raw LIKE '%(upload://%'")
.find_each do |post|
.where("raw LIKE '%.s3%.amazonaws.com/%' OR raw LIKE '%#{SiteSetting.Upload.absolute_base_url}%' OR raw LIKE '%(upload://%'")
migrate_posts = migrate_posts.limit(limit.to_i) if limit
migrate_posts.find_each do |post|
begin
updated = false

View File

@ -134,6 +134,115 @@ RSpec.describe "tasks/uploads" do
end
end
describe "uploads:batch_migrate_from_s3" do
let!(:uploads) do
[
upload1,
upload2,
]
end
let(:upload1) { Fabricate(:upload_s3) }
let(:upload2) { Fabricate(:upload_s3) }
let!(:url1) { "upload://#{upload1.base62_sha1}.jpg" }
let!(:url2) { "upload://#{upload2.base62_sha1}.jpg" }
let(:post1) { Fabricate(:post, raw: "[foo](#{url1})") }
let(:post2) { Fabricate(:post, raw: "[foo](#{url2})") }
before do
global_setting :s3_bucket, 'file-uploads/folder'
global_setting :s3_region, 'us-east-1'
enable_s3_uploads(uploads)
upload1.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload1.base62_sha1}.png"
upload1.save!
upload2.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload2.base62_sha1}.png"
upload2.save!
PostUpload.create(post: post1, upload: upload1)
PostUpload.create(post: post2, upload: upload2)
SiteSetting.enable_s3_uploads = false
end
def invoke_task
capture_stdout do
Rake::Task['uploads:batch_migrate_from_s3'].invoke('1')
end
end
it "applies the limit" do
FileHelper.stubs(:download).returns(file_from_fixtures("logo.png")).once()
freeze_time
post1.update_columns(baked_at: 1.week.ago)
post2.update_columns(baked_at: 1.week.ago)
invoke_task
expect(post1.reload.baked_at).not_to eq_time(1.week.ago)
expect(post2.reload.baked_at).to eq_time(1.week.ago)
end
end
describe "uploads:migrate_from_s3" do
let!(:uploads) do
[
upload1,
upload2,
]
end
let(:upload1) { Fabricate(:upload_s3) }
let(:upload2) { Fabricate(:upload_s3) }
let!(:url1) { "upload://#{upload1.base62_sha1}.jpg" }
let!(:url2) { "upload://#{upload2.base62_sha1}.jpg" }
let(:post1) { Fabricate(:post, raw: "[foo](#{url1})") }
let(:post2) { Fabricate(:post, raw: "[foo](#{url2})") }
before do
global_setting :s3_bucket, 'file-uploads/folder'
global_setting :s3_region, 'us-east-1'
enable_s3_uploads(uploads)
upload1.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload1.base62_sha1}.png"
upload1.save!
upload2.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload2.base62_sha1}.png"
upload2.save!
PostUpload.create(post: post1, upload: upload1)
PostUpload.create(post: post2, upload: upload2)
SiteSetting.enable_s3_uploads = false
end
def invoke_task
capture_stdout do
Rake::Task['uploads:migrate_from_s3'].invoke
end
end
it "fails if s3 uploads are still enabled" do
SiteSetting.enable_s3_uploads = true
expect { invoke_task }.to raise_error(SystemExit)
end
it "does not apply a limit" do
FileHelper.stubs(:download).with("http:#{upload1.url}", max_file_size: 4194304, tmp_file_name: "from_s3", follow_redirect: true).returns(file_from_fixtures("logo.png")).once()
FileHelper.stubs(:download).with("http:#{upload2.url}", max_file_size: 4194304, tmp_file_name: "from_s3", follow_redirect: true).returns(file_from_fixtures("logo.png")).once()
freeze_time
post1.update_columns(baked_at: 1.week.ago)
post2.update_columns(baked_at: 1.week.ago)
invoke_task
expect(post1.reload.baked_at).not_to eq_time(1.week.ago)
expect(post2.reload.baked_at).not_to eq_time(1.week.ago)
end
end
describe "uploads:disable_secure_media" do
def invoke_task
capture_stdout do