PERF: Speed up `migrate_to_s3` rake task.

* Prioritizes non-image uploads
* Does one remap per upload instead of 3 remaps previously
* Every 100 uploads migrated, do 2 remaps which fixes broken
  URLs
* Exclude email_logs table from remap
This commit is contained in:
Guo Xiang Tan 2018-11-08 16:37:19 +08:00
parent 57f92ac808
commit 7290145641
1 changed files with 49 additions and 39 deletions

View File

@ -232,12 +232,17 @@ def migrate_to_s3
search_logs
post_search_data
notifications
email_logs
}
# Migrate all uploads
Upload.where.not(sha1: nil)
.where("url NOT LIKE '#{s3.absolute_base_url}%'")
.find_each do |upload|
file_uploads = Upload.where.not(sha1: nil).where("url NOT LIKE '#{s3.absolute_base_url}%'")
image_uploads = file_uploads.where("lower(extension) NOT IN (?)", FileHelper.supported_images.to_a)
[image_uploads, file_uploads].each do |uploads|
upload.find_in_batches(batch_size: 100) do |batch|
batch.each do |upload|
now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
# remove invalid uploads
if upload.url.blank?
upload.destroy!
@ -249,7 +254,7 @@ def migrate_to_s3
path = local.path_for(upload)
# make sure the file exists locally
if !path || !File.exists?(path)
putc "X"
puts "#{from} does not exist locally"
next
end
@ -257,25 +262,30 @@ def migrate_to_s3
file = File.open(path)
content_type = `file --mime-type -b #{path}`.strip
to = s3.store_upload(file, upload, content_type)
rescue
putc "X"
rescue => e
puts "Encountered an error while migrating #{upload.url}: #{e.class}: #{e.message}"
next
ensure
file&.close
end
# remap the URL
[
[UrlHelper.absolute(from), Discourse.store.cdn_url(to)],
[UrlHelper.absolute_without_cdn(from), Discourse.store.cdn_url(to)],
[from, to],
].each do |from_url, to_url|
DbHelper.remap(from_url, to_url, exclude_tables: exclude_tables)
DbHelper.remap(from, to, exclude_tables: exclude_tables)
upload.optimized_images.destroy_all
puts "Migrating #{from} --> #{to} took #{Process.clock_gettime(Process::CLOCK_MONOTONIC) - now} seconds"
end
upload.optimized_images.destroy_all
putc "."
[
Discourse.asset_host,
Discourse.base_url_no_prefix
].each do |from|
now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
from = "#{from}#{SiteSetting.Upload.s3_base_url}"
to = SiteSetting.s3_cdn_url
DbHelper.remap(from, to, exclude_tables: exclude_tables)
puts "Remapping #{from} --> #{to} took #{Process.clock_gettime(Process::CLOCK_MONOTONIC) - now} seconds"
end
end
end
end