2018-01-19 11:51:42 -05:00
|
|
|
require "db_helper"
|
2013-06-15 05:29:20 -04:00
|
|
|
require "digest/sha1"
|
2018-01-19 11:51:42 -05:00
|
|
|
require "base62"
|
2013-06-15 05:29:20 -04:00
|
|
|
|
2016-04-11 14:42:40 -04:00
|
|
|
################################################################################
|
|
|
|
# gather #
|
|
|
|
################################################################################
|
|
|
|
|
|
|
|
task "uploads:gather" => :environment do
|
2016-04-12 10:00:25 -04:00
|
|
|
ENV["RAILS_DB"] ? gather_uploads : gather_uploads_for_all_sites
|
|
|
|
end
|
|
|
|
|
|
|
|
def gather_uploads_for_all_sites
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection { gather_uploads }
|
|
|
|
end
|
|
|
|
|
2016-04-13 10:33:00 -04:00
|
|
|
def file_exists?(path)
|
|
|
|
File.exists?(path) && File.size(path) > 0
|
|
|
|
rescue
|
|
|
|
false
|
|
|
|
end
|
|
|
|
|
2016-04-12 10:00:25 -04:00
|
|
|
def gather_uploads
|
2016-04-11 14:42:40 -04:00
|
|
|
public_directory = "#{Rails.root}/public"
|
|
|
|
current_db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
|
|
|
|
puts "", "Gathering uploads for '#{current_db}'...", ""
|
|
|
|
|
2016-04-11 15:17:33 -04:00
|
|
|
Upload.where("url ~ '^\/uploads\/'")
|
2017-07-27 21:20:09 -04:00
|
|
|
.where("url !~ '^\/uploads\/#{current_db}'")
|
|
|
|
.find_each do |upload|
|
2016-04-11 14:42:40 -04:00
|
|
|
begin
|
|
|
|
old_db = upload.url[/^\/uploads\/([^\/]+)\//, 1]
|
|
|
|
from = upload.url.dup
|
|
|
|
to = upload.url.sub("/uploads/#{old_db}/", "/uploads/#{current_db}/")
|
|
|
|
source = "#{public_directory}#{from}"
|
|
|
|
destination = "#{public_directory}#{to}"
|
|
|
|
|
2016-04-13 10:33:00 -04:00
|
|
|
# create destination directory & copy file unless it already exists
|
|
|
|
unless file_exists?(destination)
|
|
|
|
`mkdir -p '#{File.dirname(destination)}'`
|
|
|
|
`cp --link '#{source}' '#{destination}'`
|
|
|
|
end
|
|
|
|
|
2016-04-11 14:42:40 -04:00
|
|
|
# ensure file has been succesfuly copied over
|
2016-04-13 10:33:00 -04:00
|
|
|
raise unless file_exists?(destination)
|
|
|
|
|
2016-04-11 14:42:40 -04:00
|
|
|
# remap links in db
|
|
|
|
DbHelper.remap(from, to)
|
|
|
|
rescue
|
|
|
|
putc "!"
|
|
|
|
else
|
|
|
|
putc "."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
puts "", "Done!"
|
|
|
|
|
|
|
|
end
|
|
|
|
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
|
|
|
# backfill_shas #
|
|
|
|
################################################################################
|
|
|
|
|
2013-06-15 05:29:20 -04:00
|
|
|
task "uploads:backfill_shas" => :environment do
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do |db|
|
2015-06-10 11:19:58 -04:00
|
|
|
puts "Backfilling #{db}..."
|
|
|
|
Upload.where(sha1: nil).find_each do |u|
|
|
|
|
begin
|
|
|
|
path = Discourse.store.path_for(u)
|
2016-09-02 02:50:13 -04:00
|
|
|
u.sha1 = Upload.generate_digest(path)
|
2015-06-10 11:19:58 -04:00
|
|
|
u.save!
|
2013-06-15 05:29:20 -04:00
|
|
|
putc "."
|
2016-08-23 03:05:37 -04:00
|
|
|
rescue => e
|
2016-08-28 22:30:10 -04:00
|
|
|
puts "Skipping #{u.original_filename} (#{u.url}) #{e.message}"
|
2013-06-15 05:29:20 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2015-06-10 11:19:58 -04:00
|
|
|
puts "", "Done"
|
2013-06-15 05:29:20 -04:00
|
|
|
end
|
2014-06-24 09:35:15 -04:00
|
|
|
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
|
|
|
# migrate_from_s3 #
|
|
|
|
################################################################################
|
|
|
|
|
2014-06-24 09:35:15 -04:00
|
|
|
task "uploads:migrate_from_s3" => :environment do
|
2016-07-25 06:12:10 -04:00
|
|
|
ENV["RAILS_DB"] ? migrate_from_s3 : migrate_all_from_s3
|
|
|
|
end
|
2014-06-24 09:35:15 -04:00
|
|
|
|
2016-07-25 06:12:10 -04:00
|
|
|
def guess_filename(url, raw)
|
|
|
|
begin
|
|
|
|
uri = URI.parse("http:#{url}")
|
|
|
|
f = uri.open("rb", read_timeout: 5, redirect: true, allow_redirections: :all)
|
|
|
|
filename = if f.meta && f.meta["content-disposition"]
|
|
|
|
f.meta["content-disposition"][/filename="([^"]+)"/, 1].presence
|
|
|
|
end
|
|
|
|
filename ||= raw[/<a class="attachment" href="(?:https?:)?#{Regexp.escape(url)}">([^<]+)<\/a>/, 1].presence
|
|
|
|
filename ||= File.basename(url)
|
|
|
|
filename
|
|
|
|
rescue
|
2017-07-27 21:20:09 -04:00
|
|
|
nil
|
2016-07-25 06:12:10 -04:00
|
|
|
ensure
|
|
|
|
f.try(:close!) rescue nil
|
|
|
|
end
|
|
|
|
end
|
2015-03-18 13:23:55 -04:00
|
|
|
|
2016-07-25 06:12:10 -04:00
|
|
|
def migrate_all_from_s3
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection { migrate_from_s3 }
|
|
|
|
end
|
2015-03-18 13:23:55 -04:00
|
|
|
|
2016-07-25 06:12:10 -04:00
|
|
|
def migrate_from_s3
|
|
|
|
require "file_store/s3_store"
|
2014-06-24 09:35:15 -04:00
|
|
|
|
2016-07-25 06:12:10 -04:00
|
|
|
# make sure S3 is disabled
|
2018-01-21 19:42:09 -05:00
|
|
|
if SiteSetting.Upload.enable_s3_uploads
|
2016-07-25 06:12:10 -04:00
|
|
|
puts "You must disable S3 uploads before running that task."
|
|
|
|
return
|
|
|
|
end
|
2014-06-24 09:35:15 -04:00
|
|
|
|
2016-07-25 06:12:10 -04:00
|
|
|
db = RailsMultisite::ConnectionManagement.current_db
|
2014-06-24 09:35:15 -04:00
|
|
|
|
2016-07-25 06:12:10 -04:00
|
|
|
puts "Migrating uploads from S3 to local storage for '#{db}'..."
|
2018-11-13 06:49:26 -05:00
|
|
|
|
2018-11-13 06:27:38 -05:00
|
|
|
max_file_size = [SiteSetting.max_image_size_kb, SiteSetting.max_attachment_size_kb].max.kilobytes
|
2014-06-24 09:35:15 -04:00
|
|
|
|
2018-08-01 16:58:46 -04:00
|
|
|
Post
|
|
|
|
.where("user_id > 0")
|
|
|
|
.where("raw LIKE '%.s3%.amazonaws.com/%' OR raw LIKE '%(upload://%'")
|
|
|
|
.find_each do |post|
|
2018-01-23 12:48:06 -05:00
|
|
|
begin
|
|
|
|
updated = false
|
|
|
|
|
2018-01-24 07:22:07 -05:00
|
|
|
post.raw.gsub!(/(\/\/[\w.-]+amazonaws\.com\/(original|optimized)\/([a-z0-9]+\/)+\h{40}([\w.-]+)?)/i) do |url|
|
2016-07-25 06:12:10 -04:00
|
|
|
begin
|
|
|
|
if filename = guess_filename(url, post.raw)
|
2018-11-13 06:27:38 -05:00
|
|
|
file = FileHelper.download("http:#{url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true)
|
2018-01-24 07:22:07 -05:00
|
|
|
sha1 = Upload.generate_digest(file)
|
|
|
|
origin = nil
|
|
|
|
|
|
|
|
existing_upload = Upload.find_by(sha1: sha1)
|
|
|
|
if existing_upload&.url&.start_with?("//")
|
|
|
|
filename = existing_upload.original_filename
|
|
|
|
origin = existing_upload.origin
|
|
|
|
existing_upload.destroy
|
|
|
|
end
|
|
|
|
|
|
|
|
new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1)
|
2018-01-23 12:48:06 -05:00
|
|
|
if new_upload&.save
|
|
|
|
updated = true
|
2018-01-24 07:22:07 -05:00
|
|
|
url = new_upload.url
|
2016-07-25 06:12:10 -04:00
|
|
|
end
|
|
|
|
end
|
2018-01-23 12:48:06 -05:00
|
|
|
|
|
|
|
url
|
|
|
|
rescue
|
|
|
|
url
|
2016-07-25 06:12:10 -04:00
|
|
|
end
|
|
|
|
end
|
2018-01-23 12:48:06 -05:00
|
|
|
|
2018-08-01 16:58:46 -04:00
|
|
|
post.raw.gsub!(/(upload:\/\/[0-9a-zA-Z]+\.\w+)/) do |url|
|
|
|
|
begin
|
|
|
|
if sha1 = Upload.sha1_from_short_url(url)
|
|
|
|
if upload = Upload.find_by(sha1: sha1)
|
2018-08-02 04:04:41 -04:00
|
|
|
if upload.url.start_with?("//")
|
2018-11-13 06:27:38 -05:00
|
|
|
file = FileHelper.download("http:#{upload.url}", max_file_size: max_file_size, tmp_file_name: "from_s3", follow_redirect: true)
|
2018-08-02 04:04:41 -04:00
|
|
|
filename = upload.original_filename
|
|
|
|
origin = upload.origin
|
|
|
|
upload.destroy
|
|
|
|
|
|
|
|
new_upload = UploadCreator.new(file, filename, origin: origin).create_for(post.user_id || -1)
|
|
|
|
if new_upload&.save
|
|
|
|
updated = true
|
|
|
|
url = new_upload.url
|
|
|
|
end
|
2018-08-01 16:58:46 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
url
|
|
|
|
rescue
|
|
|
|
url
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-01-23 12:48:06 -05:00
|
|
|
if updated
|
|
|
|
post.save!
|
|
|
|
post.rebake!
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
putc "."
|
|
|
|
end
|
2018-01-24 07:22:07 -05:00
|
|
|
|
2018-01-23 12:48:06 -05:00
|
|
|
rescue
|
|
|
|
putc "X"
|
2014-06-24 09:35:15 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2016-07-25 06:12:10 -04:00
|
|
|
puts "Done!"
|
2014-06-24 09:35:15 -04:00
|
|
|
end
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
|
|
|
# migrate_to_s3 #
|
|
|
|
################################################################################
|
|
|
|
|
|
|
|
task "uploads:migrate_to_s3" => :environment do
|
|
|
|
ENV["RAILS_DB"] ? migrate_to_s3 : migrate_to_s3_all_sites
|
|
|
|
end
|
|
|
|
|
|
|
|
def migrate_to_s3_all_sites
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection { migrate_to_s3 }
|
|
|
|
end
|
|
|
|
|
|
|
|
def migrate_to_s3
|
2018-12-26 11:34:49 -05:00
|
|
|
db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
|
|
|
|
dry_run = !!ENV["DRY_RUN"]
|
|
|
|
|
|
|
|
puts "*" * 30 + " DRY RUN " + "*" * 30 if dry_run
|
|
|
|
puts "Migrating uploads to S3 for '#{db}'..."
|
|
|
|
|
|
|
|
if Upload.where("url NOT LIKE '//%' AND url NOT LIKE '/uploads/#{db}/original/_X/%'").exists?
|
|
|
|
puts <<~TEXT
|
|
|
|
Some uploads were not migrated to the new scheme. Please run these commands in the rails console
|
|
|
|
|
|
|
|
SiteSetting.migrate_to_new_scheme = true
|
|
|
|
Jobs::MigrateUploadScheme.new.execute(nil)
|
|
|
|
TEXT
|
|
|
|
exit 1
|
2015-05-25 11:59:00 -04:00
|
|
|
end
|
|
|
|
|
2018-12-26 11:34:49 -05:00
|
|
|
unless GlobalSetting.use_s3?
|
|
|
|
puts <<~TEXT
|
|
|
|
Please provide the following environment variables
|
|
|
|
- DISCOURSE_S3_BUCKET
|
|
|
|
- DISCOURSE_S3_REGION
|
|
|
|
- DISCOURSE_S3_ACCESS_KEY_ID
|
|
|
|
- DISCOURSE_S3_SECRET_ACCESS_KEY
|
|
|
|
TEXT
|
|
|
|
exit 2
|
|
|
|
end
|
2015-05-25 11:59:00 -04:00
|
|
|
|
2018-12-26 11:34:49 -05:00
|
|
|
if SiteSetting.Upload.s3_cdn_url.blank?
|
|
|
|
puts "Please provide the 'DISCOURSE_S3_CDN_URL' environment variable"
|
|
|
|
exit 3
|
|
|
|
end
|
2015-05-25 11:59:00 -04:00
|
|
|
|
2019-01-20 10:14:07 -05:00
|
|
|
bucket_has_folder_path = true if ENV["DISCOURSE_S3_BUCKET"].include? "/"
|
|
|
|
|
|
|
|
s3 = Aws::S3::Client.new(
|
|
|
|
region: ENV["DISCOURSE_S3_REGION"],
|
|
|
|
access_key_id: ENV["DISCOURSE_S3_ACCESS_KEY_ID"],
|
|
|
|
secret_access_key: ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"])
|
2018-11-07 23:37:36 -05:00
|
|
|
|
2019-01-08 09:34:48 -05:00
|
|
|
if bucket_has_folder_path
|
2019-01-20 10:14:07 -05:00
|
|
|
bucket, folder = S3Helper.get_bucket_and_folder_path(ENV["DISCOURSE_S3_BUCKET"])
|
2019-01-08 09:34:48 -05:00
|
|
|
folder = File.join(folder, "/")
|
|
|
|
else
|
2019-01-20 10:14:07 -05:00
|
|
|
bucket, folder = ENV["DISCOURSE_S3_BUCKET"], ""
|
2019-01-08 09:34:48 -05:00
|
|
|
end
|
|
|
|
|
2018-12-26 11:34:49 -05:00
|
|
|
puts "Uploading files to S3..."
|
|
|
|
print " - Listing local files"
|
|
|
|
|
|
|
|
local_files = []
|
|
|
|
IO.popen("cd public && find uploads/#{db}/original -type f").each do |file|
|
|
|
|
local_files << file.chomp
|
|
|
|
putc "." if local_files.size % 1000 == 0
|
|
|
|
end
|
|
|
|
|
|
|
|
puts " => #{local_files.size} files"
|
|
|
|
print " - Listing S3 files"
|
|
|
|
|
|
|
|
s3_objects = []
|
2019-01-23 02:19:50 -05:00
|
|
|
|
|
|
|
prefix =
|
|
|
|
if ENV["SKIP_MULTISITE_PREFIX"] || Rails.configuration.multisite
|
|
|
|
"uploads/#{db}/original/"
|
|
|
|
else
|
|
|
|
"original/"
|
|
|
|
end
|
|
|
|
|
2019-01-15 05:27:25 -05:00
|
|
|
options = { bucket: bucket, prefix: folder + prefix }
|
2018-12-26 11:34:49 -05:00
|
|
|
|
|
|
|
loop do
|
|
|
|
response = s3.list_objects_v2(options)
|
|
|
|
s3_objects.concat(response.contents)
|
|
|
|
putc "."
|
|
|
|
break if response.next_continuation_token.blank?
|
|
|
|
options[:continuation_token] = response.next_continuation_token
|
|
|
|
end
|
|
|
|
|
|
|
|
puts " => #{s3_objects.size} files"
|
2019-01-04 14:50:00 -05:00
|
|
|
puts " - Syncing files to S3"
|
2015-05-25 11:59:00 -04:00
|
|
|
|
2018-12-26 11:34:49 -05:00
|
|
|
synced = 0
|
|
|
|
failed = []
|
|
|
|
|
|
|
|
local_files.each do |file|
|
|
|
|
path = File.join("public", file)
|
|
|
|
name = File.basename(path)
|
|
|
|
etag = Digest::MD5.file(path).hexdigest
|
2019-01-08 09:34:48 -05:00
|
|
|
key = file[file.index(prefix)..-1]
|
|
|
|
key.prepend(folder) if bucket_has_folder_path
|
2018-12-26 11:34:49 -05:00
|
|
|
|
|
|
|
if s3_object = s3_objects.find { |obj| file.ends_with?(obj.key) }
|
|
|
|
next if File.size(path) == s3_object.size && s3_object.etag[etag]
|
|
|
|
end
|
|
|
|
|
|
|
|
options = {
|
|
|
|
acl: "public-read",
|
|
|
|
body: File.open(path, "rb"),
|
2019-01-08 09:34:48 -05:00
|
|
|
bucket: bucket,
|
2018-12-26 11:34:49 -05:00
|
|
|
content_type: MiniMime.lookup_by_filename(name)&.content_type,
|
2019-01-08 09:34:48 -05:00
|
|
|
key: key,
|
2018-12-26 11:34:49 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if !FileHelper.is_supported_image?(name)
|
2019-01-23 02:03:49 -05:00
|
|
|
upload = Upload.find_by(url: "/#{file}")
|
|
|
|
|
|
|
|
if upload&.original_filename
|
|
|
|
options[:content_disposition] =
|
|
|
|
%Q{attachment; filename="#{upload.original_filename}"}
|
|
|
|
end
|
2018-12-26 11:34:49 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
if dry_run
|
|
|
|
puts "#{file} => #{options[:key]}"
|
|
|
|
synced += 1
|
|
|
|
elsif s3.put_object(options).etag[etag]
|
|
|
|
putc "."
|
|
|
|
synced += 1
|
|
|
|
else
|
|
|
|
putc "X"
|
|
|
|
failed << path
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
puts
|
|
|
|
|
|
|
|
if failed.size > 0
|
|
|
|
puts "Failed to upload #{failed.size} files"
|
|
|
|
puts failed.join("\n")
|
|
|
|
elsif s3_objects.size + synced >= local_files.size
|
|
|
|
puts "Updating the URLs in the database..."
|
|
|
|
|
2019-01-23 02:50:44 -05:00
|
|
|
from = "/uploads/#{db}/original/"
|
|
|
|
to = "#{SiteSetting.Upload.s3_base_url}/#{prefix}"
|
2018-12-26 11:34:49 -05:00
|
|
|
|
|
|
|
if dry_run
|
|
|
|
puts "REPLACING '#{from}' WITH '#{to}'"
|
|
|
|
else
|
2019-01-23 02:50:44 -05:00
|
|
|
DbHelper.remap(from, to, anchor_left: true)
|
2018-12-26 11:34:49 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
if Discourse.asset_host.present?
|
|
|
|
# Uploads that were on local CDN will now be on S3 CDN
|
2019-01-23 02:50:44 -05:00
|
|
|
from = "#{Discourse.asset_host}/uploads/#{db}/original/"
|
|
|
|
to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}"
|
2018-12-26 11:34:49 -05:00
|
|
|
|
|
|
|
if dry_run
|
|
|
|
puts "REMAPPING '#{from}' TO '#{to}'"
|
|
|
|
else
|
2019-01-23 02:50:44 -05:00
|
|
|
DbHelper.remap(from, to)
|
2018-11-08 03:37:19 -05:00
|
|
|
end
|
|
|
|
end
|
2019-01-23 02:50:44 -05:00
|
|
|
|
|
|
|
# Uploads that were on base hostname will now be on S3 CDN
|
|
|
|
from = "#{Discourse.base_url}/uploads/#{db}/original/"
|
|
|
|
to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}"
|
|
|
|
|
|
|
|
if dry_run
|
|
|
|
puts "REMAPPING '#{from}' TO '#{to}'"
|
|
|
|
else
|
|
|
|
DbHelper.remap(from, to)
|
|
|
|
end
|
2015-05-25 11:59:00 -04:00
|
|
|
end
|
2018-12-26 11:34:49 -05:00
|
|
|
|
|
|
|
puts "Done!"
|
2015-05-25 11:59:00 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
################################################################################
|
2018-12-26 11:34:49 -05:00
|
|
|
# clean_up #
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
|
|
|
|
2014-09-29 12:31:53 -04:00
|
|
|
task "uploads:clean_up" => :environment do
|
2018-12-26 11:34:49 -05:00
|
|
|
ENV["RAILS_DB"] ? clean_up_uploads : clean_up_uploads_all_sites
|
|
|
|
end
|
|
|
|
|
|
|
|
def clean_up_uploads_all_sites
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection { clean_up_uploads }
|
2016-09-02 02:50:13 -04:00
|
|
|
end
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
def clean_up_uploads
|
|
|
|
db = RailsMultisite::ConnectionManagement.current_db
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
puts "Cleaning up uploads and thumbnails for '#{db}'..."
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
if Discourse.store.external?
|
|
|
|
puts "This task only works for internal storages."
|
|
|
|
exit 1
|
|
|
|
end
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
puts <<~OUTPUT
|
|
|
|
This task will remove upload records and files permanently.
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
Would you like to take a full backup before the clean up? (Y/N)
|
|
|
|
OUTPUT
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
if STDIN.gets.chomp.downcase == 'y'
|
|
|
|
puts "Starting backup..."
|
|
|
|
backuper = BackupRestore::Backuper.new(Discourse.system_user.id)
|
|
|
|
backuper.run
|
|
|
|
exit 1 unless backuper.success
|
|
|
|
end
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
public_directory = Rails.root.join("public").to_s
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
##
|
|
|
|
## DATABASE vs FILE SYSTEM
|
|
|
|
##
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
# uploads & avatars
|
|
|
|
Upload.find_each do |upload|
|
|
|
|
path = File.join(public_directory, upload.url)
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
if !File.exists?(path)
|
|
|
|
upload.destroy!
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
putc "."
|
2014-09-29 12:31:53 -04:00
|
|
|
end
|
2016-09-02 02:50:13 -04:00
|
|
|
end
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
# optimized images
|
|
|
|
OptimizedImage.find_each do |optimized_image|
|
|
|
|
path = File.join(public_directory, optimized_image.url)
|
|
|
|
|
|
|
|
if !File.exists?(path)
|
|
|
|
optimized_image.destroy!
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
putc "."
|
2014-09-29 12:31:53 -04:00
|
|
|
end
|
2016-09-02 02:50:13 -04:00
|
|
|
end
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
##
|
|
|
|
## FILE SYSTEM vs DATABASE
|
|
|
|
##
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
uploads_directory = File.join(public_directory, 'uploads', db).to_s
|
|
|
|
|
|
|
|
# avatars (no avatar should be stored in that old directory)
|
|
|
|
FileUtils.rm_rf("#{uploads_directory}/avatars")
|
|
|
|
|
|
|
|
# uploads and optimized images
|
|
|
|
Dir.glob("#{uploads_directory}/**/*.*").each do |file_path|
|
|
|
|
sha1 = Upload.generate_digest(file_path)
|
|
|
|
url = file_path.split(public_directory, 2)[1]
|
|
|
|
|
|
|
|
if (Upload.where(sha1: sha1).empty? &&
|
|
|
|
Upload.where(url: url).empty?) &&
|
|
|
|
(OptimizedImage.where(sha1: sha1).empty? &&
|
|
|
|
OptimizedImage.where(url: url).empty?)
|
|
|
|
|
|
|
|
FileUtils.rm(file_path)
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
putc "."
|
|
|
|
end
|
2014-09-29 12:31:53 -04:00
|
|
|
end
|
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
puts "Removing empty directories..."
|
|
|
|
puts `find #{uploads_directory} -type d -empty -exec rmdir {} \\;`
|
|
|
|
|
|
|
|
puts "Done!"
|
2014-09-29 12:31:53 -04:00
|
|
|
end
|
2015-05-10 20:30:22 -04:00
|
|
|
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
|
|
|
# missing #
|
|
|
|
################################################################################
|
2015-05-10 20:30:22 -04:00
|
|
|
|
|
|
|
# list all missing uploads and optimized images
|
|
|
|
task "uploads:missing" => :environment do
|
2016-09-01 22:22:03 -04:00
|
|
|
if ENV["RAILS_DB"]
|
2018-03-12 08:08:53 -04:00
|
|
|
list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'])
|
2016-09-01 22:22:03 -04:00
|
|
|
else
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do |db|
|
2018-03-12 08:08:53 -04:00
|
|
|
list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'])
|
2015-05-10 20:30:22 -04:00
|
|
|
end
|
2016-09-01 22:22:03 -04:00
|
|
|
end
|
|
|
|
end
|
2015-05-10 20:30:22 -04:00
|
|
|
|
2018-03-12 08:08:53 -04:00
|
|
|
def list_missing_uploads(skip_optimized: false)
|
2018-11-26 14:24:51 -05:00
|
|
|
Discourse.store.list_missing_uploads(skip_optimized: skip_optimized)
|
2015-05-10 20:30:22 -04:00
|
|
|
end
|
2015-05-11 06:59:50 -04:00
|
|
|
|
2016-09-21 04:50:08 -04:00
|
|
|
################################################################################
|
|
|
|
# Recover from tombstone #
|
|
|
|
################################################################################
|
|
|
|
|
|
|
|
task "uploads:recover_from_tombstone" => :environment do
|
|
|
|
if ENV["RAILS_DB"]
|
|
|
|
recover_from_tombstone
|
|
|
|
else
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection { recover_from_tombstone }
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def recover_from_tombstone
|
|
|
|
if Discourse.store.external?
|
|
|
|
puts "This task only works for internal storages."
|
|
|
|
return
|
|
|
|
end
|
|
|
|
|
2016-09-21 05:47:27 -04:00
|
|
|
begin
|
2018-03-28 09:51:47 -04:00
|
|
|
previous_image_size = SiteSetting.max_image_size_kb
|
|
|
|
previous_attachment_size = SiteSetting.max_attachment_size_kb
|
|
|
|
previous_extensions = SiteSetting.authorized_extensions
|
|
|
|
|
|
|
|
SiteSetting.max_image_size_kb = 10 * 1024
|
|
|
|
SiteSetting.max_attachment_size_kb = 10 * 1024
|
|
|
|
SiteSetting.authorized_extensions = "*"
|
|
|
|
|
2016-09-22 00:28:36 -04:00
|
|
|
current_db = RailsMultisite::ConnectionManagement.current_db
|
2016-09-21 05:47:27 -04:00
|
|
|
public_path = Rails.root.join("public")
|
2016-09-22 00:28:36 -04:00
|
|
|
paths = Dir.glob(File.join(public_path, 'uploads', 'tombstone', current_db, '**', '*.*'))
|
2018-01-19 09:40:17 -05:00
|
|
|
max = paths.size
|
2016-09-21 05:47:27 -04:00
|
|
|
|
2016-09-21 22:39:39 -04:00
|
|
|
paths.each_with_index do |path, index|
|
2016-09-21 06:01:59 -04:00
|
|
|
filename = File.basename(path)
|
2016-09-21 22:39:39 -04:00
|
|
|
printf("%9d / %d (%5.1f%%)\n", (index + 1), max, (((index + 1).to_f / max.to_f) * 100).round(1))
|
2016-09-21 05:47:27 -04:00
|
|
|
|
2018-01-19 09:40:17 -05:00
|
|
|
Post.where("raw LIKE ?", "%#{filename}%").find_each do |post|
|
2016-09-21 06:01:59 -04:00
|
|
|
doc = Nokogiri::HTML::fragment(post.raw)
|
2016-09-21 22:39:39 -04:00
|
|
|
updated = false
|
2016-09-21 04:50:08 -04:00
|
|
|
|
2018-03-28 09:51:47 -04:00
|
|
|
image_urls = doc.css("img[src]").map { |img| img["src"] }
|
|
|
|
attachment_urls = doc.css("a.attachment[href]").map { |a| a["href"] }
|
2016-09-21 04:50:08 -04:00
|
|
|
|
2018-03-28 09:51:47 -04:00
|
|
|
(image_urls + attachment_urls).each do |url|
|
2018-01-19 09:40:17 -05:00
|
|
|
next if !url.start_with?("/uploads/")
|
|
|
|
next if Upload.exists?(url: url)
|
2016-09-21 04:50:08 -04:00
|
|
|
|
2018-01-19 09:40:17 -05:00
|
|
|
puts "Restoring #{path}..."
|
|
|
|
tombstone_path = File.join(public_path, 'uploads', 'tombstone', url.gsub(/^\/uploads\//, ""))
|
2016-09-21 06:01:59 -04:00
|
|
|
|
2018-01-19 09:40:17 -05:00
|
|
|
if File.exists?(tombstone_path)
|
|
|
|
File.open(tombstone_path) do |file|
|
|
|
|
new_upload = UploadCreator.new(file, File.basename(url)).create_for(Discourse::SYSTEM_USER_ID)
|
2016-09-21 06:01:59 -04:00
|
|
|
|
2018-01-19 09:40:17 -05:00
|
|
|
if new_upload.persisted?
|
|
|
|
puts "Restored into #{new_upload.url}"
|
|
|
|
DbHelper.remap(url, new_upload.url)
|
|
|
|
updated = true
|
|
|
|
else
|
|
|
|
puts "Failed to create upload for #{url}: #{new_upload.errors.full_messages}."
|
2016-09-21 05:47:27 -04:00
|
|
|
end
|
|
|
|
end
|
2018-01-19 09:40:17 -05:00
|
|
|
else
|
|
|
|
puts "Failed to find file (#{tombstone_path}) in tombstone."
|
2016-09-21 04:50:08 -04:00
|
|
|
end
|
|
|
|
end
|
2016-09-21 22:39:39 -04:00
|
|
|
|
|
|
|
post.rebake! if updated
|
2016-09-21 04:50:08 -04:00
|
|
|
end
|
2018-01-19 09:40:17 -05:00
|
|
|
|
|
|
|
sha1 = File.basename(filename, File.extname(filename))
|
|
|
|
short_url = "upload://#{Base62.encode(sha1.hex)}"
|
|
|
|
|
|
|
|
Post.where("raw LIKE ?", "%#{short_url}%").find_each do |post|
|
|
|
|
puts "Restoring #{path}..."
|
|
|
|
|
|
|
|
File.open(path) do |file|
|
|
|
|
new_upload = UploadCreator.new(file, filename).create_for(Discourse::SYSTEM_USER_ID)
|
|
|
|
|
|
|
|
if new_upload.persisted?
|
|
|
|
puts "Restored into #{new_upload.short_url}"
|
|
|
|
DbHelper.remap(short_url, new_upload.short_url) if short_url != new_upload.short_url
|
|
|
|
post.rebake!
|
|
|
|
else
|
|
|
|
puts "Failed to create upload for #{filename}: #{new_upload.errors.full_messages}."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2016-09-21 04:50:08 -04:00
|
|
|
end
|
2016-09-21 05:47:27 -04:00
|
|
|
ensure
|
2018-03-28 09:51:47 -04:00
|
|
|
SiteSetting.max_image_size_kb = previous_image_size
|
|
|
|
SiteSetting.max_attachment_size_kb = previous_attachment_size
|
|
|
|
SiteSetting.authorized_extensions = previous_extensions
|
2016-09-21 04:50:08 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
|
|
|
# regenerate_missing_optimized #
|
|
|
|
################################################################################
|
|
|
|
|
2015-05-11 06:59:50 -04:00
|
|
|
# regenerate missing optimized images
|
|
|
|
task "uploads:regenerate_missing_optimized" => :environment do
|
2016-09-02 01:06:31 -04:00
|
|
|
if ENV["RAILS_DB"]
|
|
|
|
regenerate_missing_optimized
|
|
|
|
else
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection { regenerate_missing_optimized }
|
|
|
|
end
|
2015-05-11 10:19:16 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def regenerate_missing_optimized
|
2015-05-11 13:07:39 -04:00
|
|
|
db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
|
|
|
|
puts "Regenerating missing optimized images for '#{db}'..."
|
2015-05-11 06:59:50 -04:00
|
|
|
|
|
|
|
if Discourse.store.external?
|
|
|
|
puts "This task only works for internal storages."
|
|
|
|
return
|
|
|
|
end
|
|
|
|
|
|
|
|
public_directory = "#{Rails.root}/public"
|
|
|
|
missing_uploads = Set.new
|
|
|
|
|
2016-08-25 06:29:52 -04:00
|
|
|
avatar_upload_ids = UserAvatar.all.pluck(:custom_upload_id, :gravatar_upload_id).flatten.compact
|
2015-05-11 06:59:50 -04:00
|
|
|
|
2016-08-25 06:29:52 -04:00
|
|
|
default_scope = OptimizedImage.includes(:upload)
|
2015-05-11 13:07:39 -04:00
|
|
|
|
2016-08-25 06:29:52 -04:00
|
|
|
[
|
|
|
|
default_scope
|
|
|
|
.where("optimized_images.upload_id IN (?)", avatar_upload_ids),
|
2015-05-11 09:41:52 -04:00
|
|
|
|
2016-08-25 06:29:52 -04:00
|
|
|
default_scope
|
|
|
|
.where("optimized_images.upload_id NOT IN (?)", avatar_upload_ids)
|
|
|
|
.where("LENGTH(COALESCE(url, '')) > 0")
|
|
|
|
.where("width > 0 AND height > 0")
|
|
|
|
].each do |scope|
|
|
|
|
scope.find_each do |optimized_image|
|
|
|
|
upload = optimized_image.upload
|
2015-05-11 06:59:50 -04:00
|
|
|
|
2016-08-25 06:29:52 -04:00
|
|
|
next unless optimized_image.url =~ /^\/[^\/]/
|
|
|
|
next unless upload.url =~ /^\/[^\/]/
|
|
|
|
|
|
|
|
thumbnail = "#{public_directory}#{optimized_image.url}"
|
|
|
|
original = "#{public_directory}#{upload.url}"
|
|
|
|
|
|
|
|
if !File.exists?(thumbnail) || File.size(thumbnail) <= 0
|
|
|
|
# make sure the original image exists locally
|
|
|
|
if (!File.exists?(original) || File.size(original) <= 0) && upload.origin.present?
|
|
|
|
# try to fix it by redownloading it
|
|
|
|
begin
|
2017-05-24 13:42:52 -04:00
|
|
|
downloaded = FileHelper.download(
|
|
|
|
upload.origin,
|
|
|
|
max_file_size: SiteSetting.max_image_size_kb.kilobytes,
|
|
|
|
tmp_file_name: "discourse-missing",
|
|
|
|
follow_redirect: true
|
|
|
|
) rescue nil
|
2016-08-25 06:29:52 -04:00
|
|
|
if downloaded && downloaded.size > 0
|
|
|
|
FileUtils.mkdir_p(File.dirname(original))
|
|
|
|
File.open(original, "wb") { |f| f.write(downloaded.read) }
|
|
|
|
end
|
|
|
|
ensure
|
|
|
|
downloaded.try(:close!) if downloaded.respond_to?(:close!)
|
2015-05-11 13:07:39 -04:00
|
|
|
end
|
2015-05-11 11:03:48 -04:00
|
|
|
end
|
|
|
|
|
2016-08-25 06:29:52 -04:00
|
|
|
if File.exists?(original) && File.size(original) > 0
|
|
|
|
FileUtils.mkdir_p(File.dirname(thumbnail))
|
|
|
|
OptimizedImage.resize(original, thumbnail, optimized_image.width, optimized_image.height)
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
missing_uploads << original
|
|
|
|
putc "X"
|
|
|
|
end
|
2015-05-11 06:59:50 -04:00
|
|
|
else
|
2016-08-25 06:29:52 -04:00
|
|
|
putc "."
|
2015-05-11 06:59:50 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
puts "", "Done"
|
|
|
|
|
|
|
|
if missing_uploads.size > 0
|
|
|
|
puts "Missing uploads:"
|
|
|
|
missing_uploads.sort.each { |u| puts u }
|
|
|
|
end
|
|
|
|
end
|
2015-05-19 06:31:51 -04:00
|
|
|
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
2015-06-12 06:02:36 -04:00
|
|
|
# migrate_to_new_scheme #
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
|
|
|
|
2015-06-12 06:02:36 -04:00
|
|
|
task "uploads:start_migration" => :environment do
|
|
|
|
SiteSetting.migrate_to_new_scheme = true
|
|
|
|
puts "Migration started!"
|
2015-05-19 06:31:51 -04:00
|
|
|
end
|
|
|
|
|
2015-06-12 06:02:36 -04:00
|
|
|
task "uploads:stop_migration" => :environment do
|
|
|
|
SiteSetting.migrate_to_new_scheme = false
|
|
|
|
puts "Migration stoped!"
|
2015-05-19 06:31:51 -04:00
|
|
|
end
|
2016-09-01 03:19:14 -04:00
|
|
|
|
|
|
|
task "uploads:analyze", [:cache_path, :limit] => :environment do |_, args|
|
|
|
|
now = Time.zone.now
|
|
|
|
current_db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
|
|
|
|
puts "Analyzing uploads for '#{current_db}'... This may take awhile...\n"
|
|
|
|
cache_path = args[:cache_path]
|
|
|
|
|
|
|
|
current_db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
uploads_path = Rails.root.join('public', 'uploads', current_db)
|
|
|
|
|
|
|
|
path =
|
|
|
|
if cache_path
|
|
|
|
cache_path
|
|
|
|
else
|
|
|
|
path = "/tmp/#{current_db}-#{now.to_i}-paths.txt"
|
|
|
|
FileUtils.touch("/tmp/#{now.to_i}-paths.txt")
|
|
|
|
`find #{uploads_path} -type f -printf '%s %h/%f\n' > #{path}`
|
|
|
|
path
|
|
|
|
end
|
|
|
|
|
|
|
|
extensions = {}
|
|
|
|
paths_count = 0
|
|
|
|
|
|
|
|
File.readlines(path).each do |line|
|
|
|
|
size, file_path = line.split(" ", 2)
|
|
|
|
|
|
|
|
paths_count += 1
|
|
|
|
extension = File.extname(file_path).chomp.downcase
|
|
|
|
extensions[extension] ||= {}
|
|
|
|
extensions[extension]["count"] ||= 0
|
|
|
|
extensions[extension]["count"] += 1
|
|
|
|
extensions[extension]["size"] ||= 0
|
|
|
|
extensions[extension]["size"] += size.to_i
|
|
|
|
end
|
|
|
|
|
|
|
|
uploads_count = Upload.count
|
|
|
|
optimized_images_count = OptimizedImage.count
|
|
|
|
|
|
|
|
puts <<~REPORT
|
|
|
|
Report for '#{current_db}'
|
|
|
|
-----------#{'-' * current_db.length}
|
|
|
|
Number of `Upload` records in DB: #{uploads_count}
|
|
|
|
Number of `OptimizedImage` records in DB: #{optimized_images_count}
|
|
|
|
**Total DB records: #{uploads_count + optimized_images_count}**
|
|
|
|
|
|
|
|
Number of images in uploads folder: #{paths_count}
|
|
|
|
------------------------------------#{'-' * paths_count.to_s.length}
|
|
|
|
|
|
|
|
REPORT
|
|
|
|
|
|
|
|
helper = Class.new do
|
|
|
|
include ActionView::Helpers::NumberHelper
|
|
|
|
end
|
|
|
|
|
|
|
|
helper = helper.new
|
|
|
|
|
|
|
|
printf "%-15s | %-15s | %-15s\n", 'extname', 'total size', 'count'
|
|
|
|
puts "-" * 45
|
|
|
|
|
|
|
|
extensions.sort_by { |_, value| value['size'] }.reverse.each do |extname, value|
|
|
|
|
printf "%-15s | %-15s | %-15s\n", extname, helper.number_to_human_size(value['size']), value['count']
|
|
|
|
end
|
|
|
|
|
|
|
|
puts "\n"
|
|
|
|
|
|
|
|
limit = args[:limit] || 10
|
|
|
|
|
|
|
|
sql = <<~SQL
|
|
|
|
SELECT
|
|
|
|
users.username,
|
|
|
|
COUNT(uploads.user_id) AS num_of_uploads,
|
|
|
|
SUM(uploads.filesize) AS total_size_of_uploads,
|
|
|
|
COUNT(optimized_images.id) AS num_of_optimized_images
|
|
|
|
FROM users
|
|
|
|
INNER JOIN uploads ON users.id = uploads.user_id
|
|
|
|
INNER JOIN optimized_images ON uploads.id = optimized_images.upload_id
|
|
|
|
GROUP BY users.id
|
|
|
|
ORDER BY total_size_of_uploads DESC
|
|
|
|
LIMIT #{limit}
|
|
|
|
SQL
|
|
|
|
|
|
|
|
puts "Users using the most disk space"
|
|
|
|
puts "-------------------------------\n"
|
|
|
|
printf "%-25s | %-25s | %-25s | %-25s\n", 'username', 'total size of uploads', 'number of uploads', 'number of optimized images'
|
|
|
|
puts "-" * 110
|
|
|
|
|
2018-06-19 02:13:14 -04:00
|
|
|
DB.query_single(sql).each do |username, num_of_uploads, total_size_of_uploads, num_of_optimized_images|
|
2016-09-01 03:19:14 -04:00
|
|
|
printf "%-25s | %-25s | %-25s | %-25s\n", username, helper.number_to_human_size(total_size_of_uploads), num_of_uploads, num_of_optimized_images
|
|
|
|
end
|
|
|
|
|
|
|
|
puts "\n"
|
|
|
|
puts "List of file paths @ #{path}"
|
|
|
|
puts "Duration: #{Time.zone.now - now} seconds"
|
|
|
|
end
|
2018-08-08 01:14:52 -04:00
|
|
|
|
|
|
|
task "uploads:fix_incorrect_extensions" => :environment do
|
2018-08-08 03:39:00 -04:00
|
|
|
require_dependency "upload_fixer"
|
2018-08-09 21:28:05 -04:00
|
|
|
UploadFixer.fix_all_extensions
|
2018-08-08 01:14:52 -04:00
|
|
|
end
|
2018-09-05 04:54:15 -04:00
|
|
|
|
2018-09-12 04:51:53 -04:00
|
|
|
task "uploads:recover" => :environment do
|
|
|
|
require_dependency "upload_recovery"
|
|
|
|
|
2018-09-12 09:53:01 -04:00
|
|
|
dry_run = ENV["DRY_RUN"].present?
|
|
|
|
|
2018-09-05 04:54:15 -04:00
|
|
|
if ENV["RAILS_DB"]
|
2018-09-12 09:53:01 -04:00
|
|
|
UploadRecovery.new(dry_run: dry_run).recover
|
2018-09-05 04:54:15 -04:00
|
|
|
else
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do |db|
|
2018-09-12 09:53:01 -04:00
|
|
|
UploadRecovery.new(dry_run: dry_run).recover
|
2018-09-10 03:14:30 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|