FIX: remove migrate_from_s3 task that silently corrupts data (#11703)
Transient errors in migration are ignored, silently corrupting data, and the migration is incomplete and misses many sources of uploads, which will lead to an incorrect expectation of independence from the remote object storage after announcing that the migration was successful, regardles of whether transient errors permanently corrupted the data. Remove this migration until such time as it is re-written to follow the same pattern has the migration to s3, moving the core logic out of the task.
This commit is contained in:
parent
869d25b7d3
commit
2a23e54632
|
@ -85,131 +85,6 @@ task "uploads:backfill_shas" => :environment do
|
||||||
puts "", "Done"
|
puts "", "Done"
|
||||||
end
|
end
|
||||||
|
|
||||||
################################################################################
|
|
||||||
# migrate_from_s3 #
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
task "uploads:migrate_from_s3" => :environment do
|
|
||||||
ENV["RAILS_DB"] ? migrate_from_s3 : migrate_all_from_s3
|
|
||||||
end
|
|
||||||
|
|
||||||
task "uploads:batch_migrate_from_s3", [:limit] => :environment do |_, args|
|
|
||||||
ENV["RAILS_DB"] ? migrate_from_s3(limit: args[:limit]) : migrate_all_from_s3(limit: args[:limit])
|
|
||||||
end
|
|
||||||
|
|
||||||
def guess_filename(url, raw)
|
|
||||||
begin
|
|
||||||
uri = URI.parse("http:#{url}")
|
|
||||||
f = uri.open("rb", read_timeout: 5, redirect: true, allow_redirections: :all)
|
|
||||||
filename = if f.meta && f.meta["content-disposition"]
|
|
||||||
f.meta["content-disposition"][/filename="([^"]+)"/, 1].presence
|
|
||||||
end
|
|
||||||
filename ||= raw[/<a class="attachment" href="(?:https?:)?#{Regexp.escape(url)}">([^<]+)<\/a>/, 1].presence
|
|
||||||
filename ||= File.basename(url)
|
|
||||||
filename
|
|
||||||
rescue
|
|
||||||
nil
|
|
||||||
ensure
|
|
||||||
f.try(:close!) rescue nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def migrate_all_from_s3(limit: nil)
|
|
||||||
RailsMultisite::ConnectionManagement.each_connection { migrate_from_s3(limit: limit) }
|
|
||||||
end
|
|
||||||
|
|
||||||
def migrate_from_s3(limit: nil)
|
|
||||||
require "file_store/s3_store"
|
|
||||||
|
|
||||||
# make sure S3 is disabled
|
|
||||||
if SiteSetting.Upload.enable_s3_uploads
|
|
||||||
puts "You must disable S3 uploads before running that task."
|
|
||||||
exit 1
|
|
||||||
end
|
|
||||||
|
|
||||||
db = RailsMultisite::ConnectionManagement.current_db
|
|
||||||
|
|
||||||
puts "Migrating uploads from S3 to local storage for '#{db}'..."
|
|
||||||
|
|
||||||
max_file_size = [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes
|
|
||||||
|
|
||||||
migrate_posts = Post
|
|
||||||
.where("user_id > 0")
|
|
||||||
.where("raw LIKE '%.s3%.amazonaws.com/%' OR raw LIKE '%#{SiteSetting.Upload.absolute_base_url}%' OR raw LIKE '%(upload://%'")
|
|
||||||
migrate_posts = migrate_posts.limit(limit.to_i) if limit
|
|
||||||
|
|
||||||
migrate_posts.find_each do |post|
|
|
||||||
begin
|
|
||||||
updated = false
|
|
||||||
|
|
||||||
post.raw.gsub!(/(\/\/[\w.-]+amazonaws\.com\/(original|optimized)\/([a-z0-9]+\/)+\h{40}([\w.-]+)?)/i) do |url|
|
|
||||||
begin
|
|
||||||
if filename = guess_filename(url, post.raw)
|
|
||||||
file = FileHelper.download("http:#{url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true)
|
|
||||||
sha1 = Upload.generate_digest(file)
|
|
||||||
origin = nil
|
|
||||||
|
|
||||||
existing_upload = Upload.find_by(sha1: sha1)
|
|
||||||
if existing_upload&.url&.start_with?("//")
|
|
||||||
filename = existing_upload.original_filename
|
|
||||||
origin = existing_upload.origin
|
|
||||||
existing_upload.destroy
|
|
||||||
end
|
|
||||||
|
|
||||||
new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1)
|
|
||||||
if new_upload&.save
|
|
||||||
updated = true
|
|
||||||
url = new_upload.url
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
url
|
|
||||||
rescue
|
|
||||||
url
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
post.raw.gsub!(/(upload:\/\/[0-9a-zA-Z]+\.\w+)/) do |url|
|
|
||||||
begin
|
|
||||||
if sha1 = Upload.sha1_from_short_url(url)
|
|
||||||
if upload = Upload.find_by(sha1: sha1)
|
|
||||||
if upload.url.start_with?("//")
|
|
||||||
file = FileHelper.download("http:#{upload.url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true)
|
|
||||||
filename = upload.original_filename
|
|
||||||
origin = upload.origin
|
|
||||||
upload.destroy
|
|
||||||
|
|
||||||
new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1)
|
|
||||||
if new_upload&.save
|
|
||||||
updated = true
|
|
||||||
url = new_upload.url
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
url
|
|
||||||
rescue
|
|
||||||
url
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
if updated
|
|
||||||
post.save!
|
|
||||||
post.rebake!
|
|
||||||
putc "#"
|
|
||||||
else
|
|
||||||
putc "."
|
|
||||||
end
|
|
||||||
|
|
||||||
rescue
|
|
||||||
putc "X"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
puts "Done!"
|
|
||||||
end
|
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
# migrate_to_s3 #
|
# migrate_to_s3 #
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
|
@ -139,120 +139,6 @@ RSpec.describe "tasks/uploads" do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe "uploads:batch_migrate_from_s3" do
|
|
||||||
let!(:uploads) do
|
|
||||||
[
|
|
||||||
upload1,
|
|
||||||
upload2,
|
|
||||||
]
|
|
||||||
end
|
|
||||||
|
|
||||||
let(:upload1) { Fabricate(:upload_s3) }
|
|
||||||
let(:upload2) { Fabricate(:upload_s3) }
|
|
||||||
|
|
||||||
let!(:url1) { "upload://#{upload1.base62_sha1}.jpg" }
|
|
||||||
let!(:url2) { "upload://#{upload2.base62_sha1}.jpg" }
|
|
||||||
|
|
||||||
let(:post1) { Fabricate(:post, raw: "[foo](#{url1})") }
|
|
||||||
let(:post2) { Fabricate(:post, raw: "[foo](#{url2})") }
|
|
||||||
|
|
||||||
before do
|
|
||||||
global_setting :s3_bucket, 'file-uploads/folder'
|
|
||||||
global_setting :s3_region, 'us-east-1'
|
|
||||||
|
|
||||||
setup_s3
|
|
||||||
uploads.each { |upload| stub_upload(upload) }
|
|
||||||
|
|
||||||
upload1.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload1.base62_sha1}.png"
|
|
||||||
upload1.save!
|
|
||||||
upload2.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload2.base62_sha1}.png"
|
|
||||||
upload2.save!
|
|
||||||
|
|
||||||
PostUpload.create(post: post1, upload: upload1)
|
|
||||||
PostUpload.create(post: post2, upload: upload2)
|
|
||||||
SiteSetting.enable_s3_uploads = false
|
|
||||||
end
|
|
||||||
|
|
||||||
def invoke_task
|
|
||||||
capture_stdout do
|
|
||||||
Rake::Task['uploads:batch_migrate_from_s3'].invoke('1')
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
it "applies the limit" do
|
|
||||||
FileHelper.stubs(:download).returns(file_from_fixtures("logo.png")).once()
|
|
||||||
|
|
||||||
freeze_time
|
|
||||||
|
|
||||||
post1.update_columns(baked_at: 1.week.ago)
|
|
||||||
post2.update_columns(baked_at: 1.week.ago)
|
|
||||||
invoke_task
|
|
||||||
|
|
||||||
expect(post1.reload.baked_at).not_to eq_time(1.week.ago)
|
|
||||||
expect(post2.reload.baked_at).to eq_time(1.week.ago)
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
describe "uploads:migrate_from_s3" do
|
|
||||||
let!(:uploads) do
|
|
||||||
[
|
|
||||||
upload1,
|
|
||||||
upload2,
|
|
||||||
]
|
|
||||||
end
|
|
||||||
|
|
||||||
let(:upload1) { Fabricate(:upload_s3) }
|
|
||||||
let(:upload2) { Fabricate(:upload_s3) }
|
|
||||||
|
|
||||||
let!(:url1) { "upload://#{upload1.base62_sha1}.jpg" }
|
|
||||||
let!(:url2) { "upload://#{upload2.base62_sha1}.jpg" }
|
|
||||||
|
|
||||||
let(:post1) { Fabricate(:post, raw: "[foo](#{url1})") }
|
|
||||||
let(:post2) { Fabricate(:post, raw: "[foo](#{url2})") }
|
|
||||||
|
|
||||||
before do
|
|
||||||
global_setting :s3_bucket, 'file-uploads/folder'
|
|
||||||
global_setting :s3_region, 'us-east-1'
|
|
||||||
setup_s3
|
|
||||||
uploads.each { |upload| stub_upload(upload) }
|
|
||||||
|
|
||||||
upload1.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload1.base62_sha1}.png"
|
|
||||||
upload1.save!
|
|
||||||
upload2.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload2.base62_sha1}.png"
|
|
||||||
upload2.save!
|
|
||||||
|
|
||||||
PostUpload.create(post: post1, upload: upload1)
|
|
||||||
PostUpload.create(post: post2, upload: upload2)
|
|
||||||
SiteSetting.enable_s3_uploads = false
|
|
||||||
end
|
|
||||||
|
|
||||||
def invoke_task
|
|
||||||
capture_stdout do
|
|
||||||
Rake::Task['uploads:migrate_from_s3'].invoke
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
it "fails if s3 uploads are still enabled" do
|
|
||||||
SiteSetting.enable_s3_uploads = true
|
|
||||||
expect { invoke_task }.to raise_error(SystemExit)
|
|
||||||
end
|
|
||||||
|
|
||||||
it "does not apply a limit" do
|
|
||||||
FileHelper.stubs(:download).with("http:#{upload1.url}", max_file_size: 4194304, tmp_file_name: "from_s3", follow_redirect: true).returns(file_from_fixtures("logo.png")).once()
|
|
||||||
FileHelper.stubs(:download).with("http:#{upload2.url}", max_file_size: 4194304, tmp_file_name: "from_s3", follow_redirect: true).returns(file_from_fixtures("logo.png")).once()
|
|
||||||
|
|
||||||
freeze_time
|
|
||||||
|
|
||||||
post1.update_columns(baked_at: 1.week.ago)
|
|
||||||
post2.update_columns(baked_at: 1.week.ago)
|
|
||||||
invoke_task
|
|
||||||
|
|
||||||
expect(post1.reload.baked_at).not_to eq_time(1.week.ago)
|
|
||||||
expect(post2.reload.baked_at).not_to eq_time(1.week.ago)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe "uploads:disable_secure_media" do
|
describe "uploads:disable_secure_media" do
|
||||||
def invoke_task
|
def invoke_task
|
||||||
capture_stdout do
|
capture_stdout do
|
||||||
|
|
Loading…
Reference in New Issue