FIX: remove migrate_from_s3 task that silently corrupts data (#11703)

Transient errors in migration are ignored, silently corrupting
data, and the migration is incomplete and misses many sources of
uploads, which will lead to an incorrect expectation of independence
from the remote object storage after announcing that the migration
was successful, regardles of whether transient errors permanently
corrupted the data.

Remove this migration until such time as it is re-written to
follow the same pattern has the migration to s3, moving the
core logic out of the task.
This commit is contained in:
Michael K Johnson 2021-01-17 16:33:29 -05:00 committed by GitHub
parent 869d25b7d3
commit 2a23e54632
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 0 additions and 239 deletions

View File

@ -85,131 +85,6 @@ task "uploads:backfill_shas" => :environment do
puts "", "Done" puts "", "Done"
end end
################################################################################
# migrate_from_s3 #
################################################################################
task "uploads:migrate_from_s3" => :environment do
ENV["RAILS_DB"] ? migrate_from_s3 : migrate_all_from_s3
end
task "uploads:batch_migrate_from_s3", [:limit] => :environment do |_, args|
ENV["RAILS_DB"] ? migrate_from_s3(limit: args[:limit]) : migrate_all_from_s3(limit: args[:limit])
end
def guess_filename(url, raw)
begin
uri = URI.parse("http:#{url}")
f = uri.open("rb", read_timeout: 5, redirect: true, allow_redirections: :all)
filename = if f.meta && f.meta["content-disposition"]
f.meta["content-disposition"][/filename="([^"]+)"/, 1].presence
end
filename ||= raw[/<a class="attachment" href="(?:https?:)?#{Regexp.escape(url)}">([^<]+)<\/a>/, 1].presence
filename ||= File.basename(url)
filename
rescue
nil
ensure
f.try(:close!) rescue nil
end
end
def migrate_all_from_s3(limit: nil)
RailsMultisite::ConnectionManagement.each_connection { migrate_from_s3(limit: limit) }
end
def migrate_from_s3(limit: nil)
require "file_store/s3_store"
# make sure S3 is disabled
if SiteSetting.Upload.enable_s3_uploads
puts "You must disable S3 uploads before running that task."
exit 1
end
db = RailsMultisite::ConnectionManagement.current_db
puts "Migrating uploads from S3 to local storage for '#{db}'..."
max_file_size = [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes
migrate_posts = Post
.where("user_id > 0")
.where("raw LIKE '%.s3%.amazonaws.com/%' OR raw LIKE '%#{SiteSetting.Upload.absolute_base_url}%' OR raw LIKE '%(upload://%'")
migrate_posts = migrate_posts.limit(limit.to_i) if limit
migrate_posts.find_each do |post|
begin
updated = false
post.raw.gsub!(/(\/\/[\w.-]+amazonaws\.com\/(original|optimized)\/([a-z0-9]+\/)+\h{40}([\w.-]+)?)/i) do |url|
begin
if filename = guess_filename(url, post.raw)
file = FileHelper.download("http:#{url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true)
sha1 = Upload.generate_digest(file)
origin = nil
existing_upload = Upload.find_by(sha1: sha1)
if existing_upload&.url&.start_with?("//")
filename = existing_upload.original_filename
origin = existing_upload.origin
existing_upload.destroy
end
new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1)
if new_upload&.save
updated = true
url = new_upload.url
end
end
url
rescue
url
end
end
post.raw.gsub!(/(upload:\/\/[0-9a-zA-Z]+\.\w+)/) do |url|
begin
if sha1 = Upload.sha1_from_short_url(url)
if upload = Upload.find_by(sha1: sha1)
if upload.url.start_with?("//")
file = FileHelper.download("http:#{upload.url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true)
filename = upload.original_filename
origin = upload.origin
upload.destroy
new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1)
if new_upload&.save
updated = true
url = new_upload.url
end
end
end
end
url
rescue
url
end
end
if updated
post.save!
post.rebake!
putc "#"
else
putc "."
end
rescue
putc "X"
end
end
puts "Done!"
end
################################################################################ ################################################################################
# migrate_to_s3 # # migrate_to_s3 #
################################################################################ ################################################################################

View File

@ -139,120 +139,6 @@ RSpec.describe "tasks/uploads" do
end end
end end
describe "uploads:batch_migrate_from_s3" do
let!(:uploads) do
[
upload1,
upload2,
]
end
let(:upload1) { Fabricate(:upload_s3) }
let(:upload2) { Fabricate(:upload_s3) }
let!(:url1) { "upload://#{upload1.base62_sha1}.jpg" }
let!(:url2) { "upload://#{upload2.base62_sha1}.jpg" }
let(:post1) { Fabricate(:post, raw: "[foo](#{url1})") }
let(:post2) { Fabricate(:post, raw: "[foo](#{url2})") }
before do
global_setting :s3_bucket, 'file-uploads/folder'
global_setting :s3_region, 'us-east-1'
setup_s3
uploads.each { |upload| stub_upload(upload) }
upload1.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload1.base62_sha1}.png"
upload1.save!
upload2.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload2.base62_sha1}.png"
upload2.save!
PostUpload.create(post: post1, upload: upload1)
PostUpload.create(post: post2, upload: upload2)
SiteSetting.enable_s3_uploads = false
end
def invoke_task
capture_stdout do
Rake::Task['uploads:batch_migrate_from_s3'].invoke('1')
end
end
it "applies the limit" do
FileHelper.stubs(:download).returns(file_from_fixtures("logo.png")).once()
freeze_time
post1.update_columns(baked_at: 1.week.ago)
post2.update_columns(baked_at: 1.week.ago)
invoke_task
expect(post1.reload.baked_at).not_to eq_time(1.week.ago)
expect(post2.reload.baked_at).to eq_time(1.week.ago)
end
end
describe "uploads:migrate_from_s3" do
let!(:uploads) do
[
upload1,
upload2,
]
end
let(:upload1) { Fabricate(:upload_s3) }
let(:upload2) { Fabricate(:upload_s3) }
let!(:url1) { "upload://#{upload1.base62_sha1}.jpg" }
let!(:url2) { "upload://#{upload2.base62_sha1}.jpg" }
let(:post1) { Fabricate(:post, raw: "[foo](#{url1})") }
let(:post2) { Fabricate(:post, raw: "[foo](#{url2})") }
before do
global_setting :s3_bucket, 'file-uploads/folder'
global_setting :s3_region, 'us-east-1'
setup_s3
uploads.each { |upload| stub_upload(upload) }
upload1.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload1.base62_sha1}.png"
upload1.save!
upload2.url = "//#{SiteSetting.s3_upload_bucket}.amazonaws.com/original/1X/#{upload2.base62_sha1}.png"
upload2.save!
PostUpload.create(post: post1, upload: upload1)
PostUpload.create(post: post2, upload: upload2)
SiteSetting.enable_s3_uploads = false
end
def invoke_task
capture_stdout do
Rake::Task['uploads:migrate_from_s3'].invoke
end
end
it "fails if s3 uploads are still enabled" do
SiteSetting.enable_s3_uploads = true
expect { invoke_task }.to raise_error(SystemExit)
end
it "does not apply a limit" do
FileHelper.stubs(:download).with("http:#{upload1.url}", max_file_size: 4194304, tmp_file_name: "from_s3", follow_redirect: true).returns(file_from_fixtures("logo.png")).once()
FileHelper.stubs(:download).with("http:#{upload2.url}", max_file_size: 4194304, tmp_file_name: "from_s3", follow_redirect: true).returns(file_from_fixtures("logo.png")).once()
freeze_time
post1.update_columns(baked_at: 1.week.ago)
post2.update_columns(baked_at: 1.week.ago)
invoke_task
expect(post1.reload.baked_at).not_to eq_time(1.week.ago)
expect(post2.reload.baked_at).not_to eq_time(1.week.ago)
end
end
describe "uploads:disable_secure_media" do describe "uploads:disable_secure_media" do
def invoke_task def invoke_task
capture_stdout do capture_stdout do