FEATURE: Upload to s3 in parallel to speed up backup restores (#13391)
Uploading lots of small files can be made significantly faster by parallelizing the `s3.put_object` calls. In testing, an UPLOAD_CONCURRENCY of 10 made a large restore 10x faster. An UPLOAD_CONCURRENCY of 20 made the same restore 18x faster. This commit is careful to parallelize as little as possible, to reduce the chance of concurrency issues. In the worker threads, no database transactions are performed. All modification of shared objects is controlled with a mutex. Unfortunately we do not have any existing tests for the `ToS3Migration` class. This change has been tested with a large site backup (120k uploads totalling 45GB)
This commit is contained in:
parent
03fc31e23b
commit
b0416cb1c1
|
@ -7,6 +7,7 @@ module FileStore
|
||||||
|
|
||||||
class ToS3Migration
|
class ToS3Migration
|
||||||
MISSING_UPLOADS_RAKE_TASK_NAME ||= 'posts:missing_uploads'
|
MISSING_UPLOADS_RAKE_TASK_NAME ||= 'posts:missing_uploads'
|
||||||
|
UPLOAD_CONCURRENCY ||= 20
|
||||||
|
|
||||||
def initialize(s3_options:, dry_run: false, migrate_to_multisite: false, skip_etag_verify: false)
|
def initialize(s3_options:, dry_run: false, migrate_to_multisite: false, skip_etag_verify: false)
|
||||||
|
|
||||||
|
@ -197,9 +198,25 @@ module FileStore
|
||||||
log " => #{s3_objects.size} files"
|
log " => #{s3_objects.size} files"
|
||||||
log " - Syncing files to S3"
|
log " - Syncing files to S3"
|
||||||
|
|
||||||
|
queue = Queue.new
|
||||||
synced = 0
|
synced = 0
|
||||||
failed = []
|
failed = []
|
||||||
|
|
||||||
|
lock = Mutex.new
|
||||||
|
upload_threads = UPLOAD_CONCURRENCY.times.map do
|
||||||
|
Thread.new do
|
||||||
|
while obj = queue.pop
|
||||||
|
if s3.put_object(obj[:options]).etag[obj[:etag]]
|
||||||
|
putc "."
|
||||||
|
lock.synchronize { synced += 1 }
|
||||||
|
else
|
||||||
|
putc "X"
|
||||||
|
lock.synchronize { failed << obj[:path] }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
local_files.each do |file|
|
local_files.each do |file|
|
||||||
path = File.join(public_directory, file)
|
path = File.join(public_directory, file)
|
||||||
name = File.basename(path)
|
name = File.basename(path)
|
||||||
|
@ -244,15 +261,14 @@ module FileStore
|
||||||
if @dry_run
|
if @dry_run
|
||||||
log "#{file} => #{options[:key]}"
|
log "#{file} => #{options[:key]}"
|
||||||
synced += 1
|
synced += 1
|
||||||
elsif s3.put_object(options).etag[etag]
|
|
||||||
putc "."
|
|
||||||
synced += 1
|
|
||||||
else
|
else
|
||||||
putc "X"
|
queue << { path: path, options: options, etag: etag }
|
||||||
failed << path
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
queue.close
|
||||||
|
upload_threads.each(&:join)
|
||||||
|
|
||||||
puts
|
puts
|
||||||
|
|
||||||
failure_message = "S3 migration failed for db '#{@current_db}'."
|
failure_message = "S3 migration failed for db '#{@current_db}'."
|
||||||
|
|
Loading…
Reference in New Issue