2019-05-02 18:17:27 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2018-01-19 11:51:42 -05:00
|
|
|
require "db_helper"
|
2013-06-15 05:29:20 -04:00
|
|
|
require "digest/sha1"
|
2018-01-19 11:51:42 -05:00
|
|
|
require "base62"
|
2013-06-15 05:29:20 -04:00
|
|
|
|
2016-04-11 14:42:40 -04:00
|
|
|
################################################################################
|
|
|
|
# gather #
|
|
|
|
################################################################################
|
|
|
|
|
2022-03-21 10:28:52 -04:00
|
|
|
require "rake_helpers"
|
2019-11-17 20:25:42 -05:00
|
|
|
|
2016-04-11 14:42:40 -04:00
|
|
|
task "uploads:gather" => :environment do
|
2016-04-12 10:00:25 -04:00
|
|
|
ENV["RAILS_DB"] ? gather_uploads : gather_uploads_for_all_sites
|
|
|
|
end
|
|
|
|
|
|
|
|
def gather_uploads_for_all_sites
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection { gather_uploads }
|
|
|
|
end
|
|
|
|
|
2016-04-13 10:33:00 -04:00
|
|
|
def file_exists?(path)
|
2022-01-05 12:45:08 -05:00
|
|
|
File.exist?(path) && File.size(path) > 0
|
2016-04-13 10:33:00 -04:00
|
|
|
rescue
|
|
|
|
false
|
|
|
|
end
|
|
|
|
|
2016-04-12 10:00:25 -04:00
|
|
|
def gather_uploads
|
2016-04-11 14:42:40 -04:00
|
|
|
public_directory = "#{Rails.root}/public"
|
|
|
|
current_db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
|
|
|
|
puts "", "Gathering uploads for '#{current_db}'...", ""
|
|
|
|
|
2016-04-11 15:17:33 -04:00
|
|
|
Upload.where("url ~ '^\/uploads\/'")
|
|
|
|
.where("url !~ '^\/uploads\/#{current_db}'")
|
|
|
|
.find_each do |upload|
|
2016-04-11 14:42:40 -04:00
|
|
|
begin
|
|
|
|
old_db = upload.url[/^\/uploads\/([^\/]+)\//, 1]
|
|
|
|
from = upload.url.dup
|
|
|
|
to = upload.url.sub("/uploads/#{old_db}/", "/uploads/#{current_db}/")
|
|
|
|
source = "#{public_directory}#{from}"
|
|
|
|
destination = "#{public_directory}#{to}"
|
|
|
|
|
2016-04-13 10:33:00 -04:00
|
|
|
# create destination directory & copy file unless it already exists
|
|
|
|
unless file_exists?(destination)
|
|
|
|
`mkdir -p '#{File.dirname(destination)}'`
|
|
|
|
`cp --link '#{source}' '#{destination}'`
|
|
|
|
end
|
|
|
|
|
2021-05-20 21:43:47 -04:00
|
|
|
# ensure file has been successfully copied over
|
2016-04-13 10:33:00 -04:00
|
|
|
raise unless file_exists?(destination)
|
|
|
|
|
2016-04-11 14:42:40 -04:00
|
|
|
# remap links in db
|
|
|
|
DbHelper.remap(from, to)
|
|
|
|
rescue
|
|
|
|
putc "!"
|
|
|
|
else
|
|
|
|
putc "."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
puts "", "Done!"
|
|
|
|
|
|
|
|
end
|
|
|
|
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
|
|
|
# backfill_shas #
|
|
|
|
################################################################################
|
|
|
|
|
2013-06-15 05:29:20 -04:00
|
|
|
task "uploads:backfill_shas" => :environment do
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do |db|
|
2015-06-10 11:19:58 -04:00
|
|
|
puts "Backfilling #{db}..."
|
|
|
|
Upload.where(sha1: nil).find_each do |u|
|
|
|
|
begin
|
|
|
|
path = Discourse.store.path_for(u)
|
2020-01-15 22:50:27 -05:00
|
|
|
sha1 = Upload.generate_digest(path)
|
|
|
|
u.sha1 = u.secure? ? SecureRandom.hex(20) : sha1
|
|
|
|
u.original_sha1 = u.secure? ? sha1 : nil
|
2015-06-10 11:19:58 -04:00
|
|
|
u.save!
|
2013-06-15 05:29:20 -04:00
|
|
|
putc "."
|
2016-08-23 03:05:37 -04:00
|
|
|
rescue => e
|
2016-08-28 22:30:10 -04:00
|
|
|
puts "Skipping #{u.original_filename} (#{u.url}) #{e.message}"
|
2013-06-15 05:29:20 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2015-06-10 11:19:58 -04:00
|
|
|
puts "", "Done"
|
2013-06-15 05:29:20 -04:00
|
|
|
end
|
2014-06-24 09:35:15 -04:00
|
|
|
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
|
|
|
# migrate_to_s3 #
|
|
|
|
################################################################################
|
|
|
|
|
|
|
|
task "uploads:migrate_to_s3" => :environment do
|
2021-01-18 11:12:47 -05:00
|
|
|
STDOUT.puts("Please note that migrating to S3 is currently not reversible! \n[CTRL+c] to cancel, [ENTER] to continue")
|
|
|
|
STDIN.gets
|
|
|
|
|
2015-05-25 11:59:00 -04:00
|
|
|
ENV["RAILS_DB"] ? migrate_to_s3 : migrate_to_s3_all_sites
|
|
|
|
end
|
|
|
|
|
|
|
|
def migrate_to_s3_all_sites
|
2019-05-20 12:25:56 -04:00
|
|
|
RailsMultisite::ConnectionManagement.each_connection do
|
|
|
|
begin
|
|
|
|
migrate_to_s3
|
2019-05-20 12:43:30 -04:00
|
|
|
rescue RuntimeError => e
|
2019-05-20 12:25:56 -04:00
|
|
|
if ENV["SKIP_FAILED"]
|
|
|
|
puts e
|
|
|
|
else
|
|
|
|
raise e unless ENV["SKIP_FAILED"]
|
|
|
|
end
|
2019-05-20 12:43:30 -04:00
|
|
|
end
|
2019-05-20 12:25:56 -04:00
|
|
|
end
|
2015-05-25 11:59:00 -04:00
|
|
|
end
|
|
|
|
|
2020-01-28 16:10:25 -05:00
|
|
|
def create_migration
|
2020-01-12 18:12:27 -05:00
|
|
|
FileStore::ToS3Migration.new(
|
|
|
|
s3_options: FileStore::ToS3Migration.s3_options_from_env,
|
|
|
|
dry_run: !!ENV["DRY_RUN"],
|
|
|
|
migrate_to_multisite: !!ENV["MIGRATE_TO_MULTISITE"],
|
|
|
|
skip_etag_verify: !!ENV["SKIP_ETAG_VERIFY"]
|
2020-01-28 16:10:25 -05:00
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
def migrate_to_s3
|
|
|
|
create_migration.migrate
|
2019-05-21 19:00:32 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
task "uploads:s3_migration_status" => :environment do
|
|
|
|
success = true
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do
|
2020-01-28 16:10:25 -05:00
|
|
|
success &&= create_migration.migration_successful?
|
2019-05-21 19:00:32 -04:00
|
|
|
end
|
|
|
|
|
2019-05-21 20:04:33 -04:00
|
|
|
queued_jobs = Sidekiq::Stats.new.queues.sum { |_ , x| x }
|
|
|
|
if queued_jobs > 50
|
|
|
|
puts "WARNING: There are #{queued_jobs} jobs queued! Wait till Sidekiq clears backlog prior to migrating site to a new host"
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
|
2019-05-21 22:58:54 -04:00
|
|
|
if !success
|
|
|
|
puts "Site is not ready for migration"
|
|
|
|
exit 1
|
|
|
|
end
|
2019-05-20 09:17:37 -04:00
|
|
|
|
2019-05-21 19:00:32 -04:00
|
|
|
puts "All sites appear to have uploads in order!"
|
2019-05-20 09:17:37 -04:00
|
|
|
end
|
|
|
|
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
2018-12-26 11:34:49 -05:00
|
|
|
# clean_up #
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
|
|
|
|
2014-09-29 12:31:53 -04:00
|
|
|
task "uploads:clean_up" => :environment do
|
2018-12-26 11:34:49 -05:00
|
|
|
ENV["RAILS_DB"] ? clean_up_uploads : clean_up_uploads_all_sites
|
|
|
|
end
|
|
|
|
|
|
|
|
def clean_up_uploads_all_sites
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection { clean_up_uploads }
|
2016-09-02 02:50:13 -04:00
|
|
|
end
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
def clean_up_uploads
|
|
|
|
db = RailsMultisite::ConnectionManagement.current_db
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
puts "Cleaning up uploads and thumbnails for '#{db}'..."
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
if Discourse.store.external?
|
|
|
|
puts "This task only works for internal storages."
|
|
|
|
exit 1
|
|
|
|
end
|
2014-09-29 12:31:53 -04:00
|
|
|
|
DEV: Correctly tag heredocs (#16061)
This allows text editors to use correct syntax coloring for the heredoc sections.
Heredoc tag names we use:
languages: SQL, JS, RUBY, LUA, HTML, CSS, SCSS, SH, HBS, XML, YAML/YML, MF, ICS
other: MD, TEXT/TXT, RAW, EMAIL
2022-02-28 14:50:55 -05:00
|
|
|
puts <<~TEXT
|
2016-09-02 02:50:13 -04:00
|
|
|
This task will remove upload records and files permanently.
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
Would you like to take a full backup before the clean up? (Y/N)
|
DEV: Correctly tag heredocs (#16061)
This allows text editors to use correct syntax coloring for the heredoc sections.
Heredoc tag names we use:
languages: SQL, JS, RUBY, LUA, HTML, CSS, SCSS, SH, HBS, XML, YAML/YML, MF, ICS
other: MD, TEXT/TXT, RAW, EMAIL
2022-02-28 14:50:55 -05:00
|
|
|
TEXT
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
if STDIN.gets.chomp.downcase == 'y'
|
|
|
|
puts "Starting backup..."
|
|
|
|
backuper = BackupRestore::Backuper.new(Discourse.system_user.id)
|
|
|
|
backuper.run
|
|
|
|
exit 1 unless backuper.success
|
|
|
|
end
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
public_directory = Rails.root.join("public").to_s
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
##
|
|
|
|
## DATABASE vs FILE SYSTEM
|
|
|
|
##
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
# uploads & avatars
|
|
|
|
Upload.find_each do |upload|
|
|
|
|
path = File.join(public_directory, upload.url)
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2022-01-05 12:45:08 -05:00
|
|
|
if !File.exist?(path)
|
2016-09-02 02:50:13 -04:00
|
|
|
upload.destroy!
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
putc "."
|
2014-09-29 12:31:53 -04:00
|
|
|
end
|
2016-09-02 02:50:13 -04:00
|
|
|
end
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
# optimized images
|
|
|
|
OptimizedImage.find_each do |optimized_image|
|
|
|
|
path = File.join(public_directory, optimized_image.url)
|
|
|
|
|
2022-01-05 12:45:08 -05:00
|
|
|
if !File.exist?(path)
|
2016-09-02 02:50:13 -04:00
|
|
|
optimized_image.destroy!
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
putc "."
|
2014-09-29 12:31:53 -04:00
|
|
|
end
|
2016-09-02 02:50:13 -04:00
|
|
|
end
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
##
|
|
|
|
## FILE SYSTEM vs DATABASE
|
|
|
|
##
|
2014-09-29 12:31:53 -04:00
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
uploads_directory = File.join(public_directory, 'uploads', db).to_s
|
|
|
|
|
|
|
|
# avatars (no avatar should be stored in that old directory)
|
|
|
|
FileUtils.rm_rf("#{uploads_directory}/avatars")
|
|
|
|
|
|
|
|
# uploads and optimized images
|
|
|
|
Dir.glob("#{uploads_directory}/**/*.*").each do |file_path|
|
|
|
|
sha1 = Upload.generate_digest(file_path)
|
|
|
|
url = file_path.split(public_directory, 2)[1]
|
|
|
|
|
|
|
|
if (Upload.where(sha1: sha1).empty? &&
|
|
|
|
Upload.where(url: url).empty?) &&
|
|
|
|
(OptimizedImage.where(sha1: sha1).empty? &&
|
|
|
|
OptimizedImage.where(url: url).empty?)
|
|
|
|
|
|
|
|
FileUtils.rm(file_path)
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
putc "."
|
|
|
|
end
|
2014-09-29 12:31:53 -04:00
|
|
|
end
|
|
|
|
|
2016-09-02 02:50:13 -04:00
|
|
|
puts "Removing empty directories..."
|
|
|
|
puts `find #{uploads_directory} -type d -empty -exec rmdir {} \\;`
|
|
|
|
|
|
|
|
puts "Done!"
|
2014-09-29 12:31:53 -04:00
|
|
|
end
|
2015-05-10 20:30:22 -04:00
|
|
|
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
2019-05-28 14:00:43 -04:00
|
|
|
# missing files #
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
2015-05-10 20:30:22 -04:00
|
|
|
|
|
|
|
# list all missing uploads and optimized images
|
2019-05-28 14:00:43 -04:00
|
|
|
task "uploads:missing_files" => :environment do
|
2016-09-01 22:22:03 -04:00
|
|
|
if ENV["RAILS_DB"]
|
2019-02-14 14:04:35 -05:00
|
|
|
list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'])
|
2016-09-01 22:22:03 -04:00
|
|
|
else
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do |db|
|
2019-05-21 02:06:35 -04:00
|
|
|
if ENV["SKIP_EXTERNAL"] == "1" && Discourse.store.external?
|
|
|
|
puts "#{RailsMultisite::ConnectionManagement.current_db} has uploads stored externally skipping!"
|
|
|
|
else
|
2019-05-21 02:11:38 -04:00
|
|
|
if Discourse.store.external?
|
|
|
|
puts "-" * 80
|
|
|
|
puts "WARNING! WARNING! WARNING!"
|
|
|
|
puts "-" * 80
|
|
|
|
puts
|
|
|
|
puts <<~TEXT
|
|
|
|
#{RailsMultisite::ConnectionManagement.current_db} has uploads on S3!
|
|
|
|
validating without inventory is likely to take an enormous amount of time.
|
|
|
|
We recommend you run SKIP_EXTERNAL=1 rake uploads:missing to skip validating if on a multisite.
|
|
|
|
TEXT
|
|
|
|
end
|
2019-05-21 02:06:35 -04:00
|
|
|
list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'])
|
|
|
|
end
|
2015-05-10 20:30:22 -04:00
|
|
|
end
|
2016-09-01 22:22:03 -04:00
|
|
|
end
|
|
|
|
end
|
2015-05-10 20:30:22 -04:00
|
|
|
|
2019-02-14 14:04:35 -05:00
|
|
|
def list_missing_uploads(skip_optimized: false)
|
|
|
|
Discourse.store.list_missing_uploads(skip_optimized: skip_optimized)
|
2015-05-10 20:30:22 -04:00
|
|
|
end
|
2015-05-11 06:59:50 -04:00
|
|
|
|
2019-05-28 14:00:43 -04:00
|
|
|
task "uploads:missing" => :environment do
|
|
|
|
Rake::Task["uploads:missing_files"].invoke
|
|
|
|
end
|
|
|
|
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
|
|
|
# regenerate_missing_optimized #
|
|
|
|
################################################################################
|
|
|
|
|
2015-05-11 06:59:50 -04:00
|
|
|
# regenerate missing optimized images
|
|
|
|
task "uploads:regenerate_missing_optimized" => :environment do
|
2016-09-02 01:06:31 -04:00
|
|
|
if ENV["RAILS_DB"]
|
|
|
|
regenerate_missing_optimized
|
|
|
|
else
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection { regenerate_missing_optimized }
|
|
|
|
end
|
2015-05-11 10:19:16 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def regenerate_missing_optimized
|
2015-05-11 13:07:39 -04:00
|
|
|
db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
|
|
|
|
puts "Regenerating missing optimized images for '#{db}'..."
|
2015-05-11 06:59:50 -04:00
|
|
|
|
|
|
|
if Discourse.store.external?
|
|
|
|
puts "This task only works for internal storages."
|
|
|
|
return
|
|
|
|
end
|
|
|
|
|
|
|
|
public_directory = "#{Rails.root}/public"
|
|
|
|
missing_uploads = Set.new
|
|
|
|
|
2016-08-25 06:29:52 -04:00
|
|
|
avatar_upload_ids = UserAvatar.all.pluck(:custom_upload_id, :gravatar_upload_id).flatten.compact
|
2015-05-11 06:59:50 -04:00
|
|
|
|
2016-08-25 06:29:52 -04:00
|
|
|
default_scope = OptimizedImage.includes(:upload)
|
2015-05-11 13:07:39 -04:00
|
|
|
|
2016-08-25 06:29:52 -04:00
|
|
|
[
|
|
|
|
default_scope
|
|
|
|
.where("optimized_images.upload_id IN (?)", avatar_upload_ids),
|
2015-05-11 09:41:52 -04:00
|
|
|
|
2016-08-25 06:29:52 -04:00
|
|
|
default_scope
|
|
|
|
.where("optimized_images.upload_id NOT IN (?)", avatar_upload_ids)
|
|
|
|
.where("LENGTH(COALESCE(url, '')) > 0")
|
|
|
|
.where("width > 0 AND height > 0")
|
|
|
|
].each do |scope|
|
|
|
|
scope.find_each do |optimized_image|
|
|
|
|
upload = optimized_image.upload
|
2015-05-11 06:59:50 -04:00
|
|
|
|
2016-08-25 06:29:52 -04:00
|
|
|
next unless optimized_image.url =~ /^\/[^\/]/
|
|
|
|
next unless upload.url =~ /^\/[^\/]/
|
|
|
|
|
|
|
|
thumbnail = "#{public_directory}#{optimized_image.url}"
|
|
|
|
original = "#{public_directory}#{upload.url}"
|
|
|
|
|
2022-01-05 12:45:08 -05:00
|
|
|
if !File.exist?(thumbnail) || File.size(thumbnail) <= 0
|
2016-08-25 06:29:52 -04:00
|
|
|
# make sure the original image exists locally
|
2022-01-05 12:45:08 -05:00
|
|
|
if (!File.exist?(original) || File.size(original) <= 0) && upload.origin.present?
|
2016-08-25 06:29:52 -04:00
|
|
|
# try to fix it by redownloading it
|
|
|
|
begin
|
2017-05-24 13:42:52 -04:00
|
|
|
downloaded = FileHelper.download(
|
|
|
|
upload.origin,
|
|
|
|
max_file_size: SiteSetting.max_image_size_kb.kilobytes,
|
|
|
|
tmp_file_name: "discourse-missing",
|
|
|
|
follow_redirect: true
|
|
|
|
) rescue nil
|
2016-08-25 06:29:52 -04:00
|
|
|
if downloaded && downloaded.size > 0
|
|
|
|
FileUtils.mkdir_p(File.dirname(original))
|
|
|
|
File.open(original, "wb") { |f| f.write(downloaded.read) }
|
|
|
|
end
|
|
|
|
ensure
|
|
|
|
downloaded.try(:close!) if downloaded.respond_to?(:close!)
|
2015-05-11 13:07:39 -04:00
|
|
|
end
|
2015-05-11 11:03:48 -04:00
|
|
|
end
|
|
|
|
|
2022-01-05 12:45:08 -05:00
|
|
|
if File.exist?(original) && File.size(original) > 0
|
2016-08-25 06:29:52 -04:00
|
|
|
FileUtils.mkdir_p(File.dirname(thumbnail))
|
|
|
|
OptimizedImage.resize(original, thumbnail, optimized_image.width, optimized_image.height)
|
|
|
|
putc "#"
|
|
|
|
else
|
|
|
|
missing_uploads << original
|
|
|
|
putc "X"
|
|
|
|
end
|
2015-05-11 06:59:50 -04:00
|
|
|
else
|
2016-08-25 06:29:52 -04:00
|
|
|
putc "."
|
2015-05-11 06:59:50 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
puts "", "Done"
|
|
|
|
|
|
|
|
if missing_uploads.size > 0
|
|
|
|
puts "Missing uploads:"
|
|
|
|
missing_uploads.sort.each { |u| puts u }
|
|
|
|
end
|
|
|
|
end
|
2015-05-19 06:31:51 -04:00
|
|
|
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
2015-06-12 06:02:36 -04:00
|
|
|
# migrate_to_new_scheme #
|
2015-05-25 11:59:00 -04:00
|
|
|
################################################################################
|
|
|
|
|
2015-06-12 06:02:36 -04:00
|
|
|
task "uploads:start_migration" => :environment do
|
|
|
|
SiteSetting.migrate_to_new_scheme = true
|
|
|
|
puts "Migration started!"
|
2015-05-19 06:31:51 -04:00
|
|
|
end
|
|
|
|
|
2015-06-12 06:02:36 -04:00
|
|
|
task "uploads:stop_migration" => :environment do
|
|
|
|
SiteSetting.migrate_to_new_scheme = false
|
|
|
|
puts "Migration stoped!"
|
2015-05-19 06:31:51 -04:00
|
|
|
end
|
2016-09-01 03:19:14 -04:00
|
|
|
|
|
|
|
task "uploads:analyze", [:cache_path, :limit] => :environment do |_, args|
|
|
|
|
now = Time.zone.now
|
|
|
|
current_db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
|
|
|
|
puts "Analyzing uploads for '#{current_db}'... This may take awhile...\n"
|
|
|
|
cache_path = args[:cache_path]
|
|
|
|
|
|
|
|
current_db = RailsMultisite::ConnectionManagement.current_db
|
|
|
|
uploads_path = Rails.root.join('public', 'uploads', current_db)
|
|
|
|
|
|
|
|
path =
|
|
|
|
if cache_path
|
|
|
|
cache_path
|
|
|
|
else
|
|
|
|
path = "/tmp/#{current_db}-#{now.to_i}-paths.txt"
|
|
|
|
FileUtils.touch("/tmp/#{now.to_i}-paths.txt")
|
|
|
|
`find #{uploads_path} -type f -printf '%s %h/%f\n' > #{path}`
|
|
|
|
path
|
|
|
|
end
|
|
|
|
|
|
|
|
extensions = {}
|
|
|
|
paths_count = 0
|
|
|
|
|
|
|
|
File.readlines(path).each do |line|
|
|
|
|
size, file_path = line.split(" ", 2)
|
|
|
|
|
|
|
|
paths_count += 1
|
|
|
|
extension = File.extname(file_path).chomp.downcase
|
|
|
|
extensions[extension] ||= {}
|
|
|
|
extensions[extension]["count"] ||= 0
|
|
|
|
extensions[extension]["count"] += 1
|
|
|
|
extensions[extension]["size"] ||= 0
|
|
|
|
extensions[extension]["size"] += size.to_i
|
|
|
|
end
|
|
|
|
|
|
|
|
uploads_count = Upload.count
|
|
|
|
optimized_images_count = OptimizedImage.count
|
|
|
|
|
DEV: Correctly tag heredocs (#16061)
This allows text editors to use correct syntax coloring for the heredoc sections.
Heredoc tag names we use:
languages: SQL, JS, RUBY, LUA, HTML, CSS, SCSS, SH, HBS, XML, YAML/YML, MF, ICS
other: MD, TEXT/TXT, RAW, EMAIL
2022-02-28 14:50:55 -05:00
|
|
|
puts <<~TEXT
|
2016-09-01 03:19:14 -04:00
|
|
|
Report for '#{current_db}'
|
|
|
|
-----------#{'-' * current_db.length}
|
|
|
|
Number of `Upload` records in DB: #{uploads_count}
|
|
|
|
Number of `OptimizedImage` records in DB: #{optimized_images_count}
|
|
|
|
**Total DB records: #{uploads_count + optimized_images_count}**
|
|
|
|
|
|
|
|
Number of images in uploads folder: #{paths_count}
|
|
|
|
------------------------------------#{'-' * paths_count.to_s.length}
|
|
|
|
|
DEV: Correctly tag heredocs (#16061)
This allows text editors to use correct syntax coloring for the heredoc sections.
Heredoc tag names we use:
languages: SQL, JS, RUBY, LUA, HTML, CSS, SCSS, SH, HBS, XML, YAML/YML, MF, ICS
other: MD, TEXT/TXT, RAW, EMAIL
2022-02-28 14:50:55 -05:00
|
|
|
TEXT
|
2016-09-01 03:19:14 -04:00
|
|
|
|
|
|
|
helper = Class.new do
|
|
|
|
include ActionView::Helpers::NumberHelper
|
|
|
|
end
|
|
|
|
|
|
|
|
helper = helper.new
|
|
|
|
|
|
|
|
printf "%-15s | %-15s | %-15s\n", 'extname', 'total size', 'count'
|
|
|
|
puts "-" * 45
|
|
|
|
|
|
|
|
extensions.sort_by { |_, value| value['size'] }.reverse.each do |extname, value|
|
|
|
|
printf "%-15s | %-15s | %-15s\n", extname, helper.number_to_human_size(value['size']), value['count']
|
|
|
|
end
|
|
|
|
|
|
|
|
puts "\n"
|
|
|
|
|
|
|
|
limit = args[:limit] || 10
|
|
|
|
|
|
|
|
sql = <<~SQL
|
|
|
|
SELECT
|
|
|
|
users.username,
|
|
|
|
COUNT(uploads.user_id) AS num_of_uploads,
|
|
|
|
SUM(uploads.filesize) AS total_size_of_uploads,
|
|
|
|
COUNT(optimized_images.id) AS num_of_optimized_images
|
|
|
|
FROM users
|
|
|
|
INNER JOIN uploads ON users.id = uploads.user_id
|
|
|
|
INNER JOIN optimized_images ON uploads.id = optimized_images.upload_id
|
|
|
|
GROUP BY users.id
|
|
|
|
ORDER BY total_size_of_uploads DESC
|
|
|
|
LIMIT #{limit}
|
|
|
|
SQL
|
|
|
|
|
|
|
|
puts "Users using the most disk space"
|
|
|
|
puts "-------------------------------\n"
|
|
|
|
printf "%-25s | %-25s | %-25s | %-25s\n", 'username', 'total size of uploads', 'number of uploads', 'number of optimized images'
|
|
|
|
puts "-" * 110
|
|
|
|
|
2018-06-19 02:13:14 -04:00
|
|
|
DB.query_single(sql).each do |username, num_of_uploads, total_size_of_uploads, num_of_optimized_images|
|
2016-09-01 03:19:14 -04:00
|
|
|
printf "%-25s | %-25s | %-25s | %-25s\n", username, helper.number_to_human_size(total_size_of_uploads), num_of_uploads, num_of_optimized_images
|
|
|
|
end
|
|
|
|
|
|
|
|
puts "\n"
|
|
|
|
puts "List of file paths @ #{path}"
|
|
|
|
puts "Duration: #{Time.zone.now - now} seconds"
|
|
|
|
end
|
2018-08-08 01:14:52 -04:00
|
|
|
|
|
|
|
task "uploads:fix_incorrect_extensions" => :environment do
|
2018-08-09 21:28:05 -04:00
|
|
|
UploadFixer.fix_all_extensions
|
2018-08-08 01:14:52 -04:00
|
|
|
end
|
2018-09-05 04:54:15 -04:00
|
|
|
|
2019-03-07 08:15:30 -05:00
|
|
|
task "uploads:recover_from_tombstone" => :environment do
|
|
|
|
Rake::Task["uploads:recover"].invoke
|
|
|
|
end
|
|
|
|
|
2018-09-12 04:51:53 -04:00
|
|
|
task "uploads:recover" => :environment do
|
|
|
|
|
2018-09-12 09:53:01 -04:00
|
|
|
dry_run = ENV["DRY_RUN"].present?
|
2019-08-01 14:24:06 -04:00
|
|
|
stop_on_error = ENV["STOP_ON_ERROR"].present?
|
2018-09-12 09:53:01 -04:00
|
|
|
|
2018-09-05 04:54:15 -04:00
|
|
|
if ENV["RAILS_DB"]
|
2019-08-01 14:24:06 -04:00
|
|
|
UploadRecovery.new(dry_run: dry_run, stop_on_error: stop_on_error).recover
|
2018-09-05 04:54:15 -04:00
|
|
|
else
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do |db|
|
2019-08-01 14:24:06 -04:00
|
|
|
UploadRecovery.new(dry_run: dry_run, stop_on_error: stop_on_error).recover
|
2018-09-10 03:14:30 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2019-05-22 01:24:36 -04:00
|
|
|
|
2020-03-02 18:03:58 -05:00
|
|
|
task "uploads:sync_s3_acls" => :environment do
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do |db|
|
|
|
|
unless Discourse.store.external?
|
|
|
|
puts "This task only works for external storage."
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
|
2022-05-22 23:14:11 -04:00
|
|
|
puts "CAUTION: This task may take a long time to complete! There are #{Upload.count} uploads to sync ACLs for."
|
|
|
|
puts ""
|
2020-03-02 18:03:58 -05:00
|
|
|
puts "-" * 30
|
|
|
|
puts "Uploads marked as secure will get a private ACL, and uploads marked as not secure will get a public ACL."
|
2022-05-22 23:14:11 -04:00
|
|
|
puts "Upload ACLs will be updated in Sidekiq jobs in batches of 100 at a time, check Sidekiq queues for SyncAclsForUploads for progress."
|
|
|
|
Upload.select(:id).find_in_batches(batch_size: 100) do |uploads|
|
|
|
|
adjust_acls(uploads.map(&:id))
|
|
|
|
end
|
2020-03-02 18:03:58 -05:00
|
|
|
puts "", "Upload ACL sync complete!"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-01-06 21:27:24 -05:00
|
|
|
task "uploads:disable_secure_media" => :environment do
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do |db|
|
|
|
|
unless Discourse.store.external?
|
|
|
|
puts "This task only works for external storage."
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
|
|
|
|
puts "Disabling secure media and resetting uploads to not secure in #{db}...", ""
|
|
|
|
|
|
|
|
SiteSetting.secure_media = false
|
|
|
|
|
2022-05-22 23:14:11 -04:00
|
|
|
secure_uploads = Upload.joins(:post_uploads).where(secure: true)
|
2020-01-06 21:27:24 -05:00
|
|
|
secure_upload_count = secure_uploads.count
|
2022-05-22 23:14:11 -04:00
|
|
|
secure_upload_ids = secure_uploads.pluck(:id)
|
2020-01-06 21:27:24 -05:00
|
|
|
|
2020-03-02 18:03:58 -05:00
|
|
|
puts "", "Marking #{secure_upload_count} uploads as not secure.", ""
|
2022-05-22 23:14:11 -04:00
|
|
|
secure_uploads.update_all(
|
|
|
|
secure: false,
|
|
|
|
security_last_changed_at: Time.zone.now,
|
|
|
|
security_last_changed_reason: "marked as not secure by disable_secure_media task"
|
|
|
|
)
|
2020-03-02 18:03:58 -05:00
|
|
|
|
2022-05-22 23:14:11 -04:00
|
|
|
post_ids_to_rebake = DB.query_single(
|
|
|
|
"SELECT DISTINCT post_id FROM post_uploads WHERE upload_id IN (?)", secure_upload_ids
|
|
|
|
)
|
|
|
|
adjust_acls(secure_upload_ids)
|
|
|
|
post_rebake_errors = rebake_upload_posts(post_ids_to_rebake)
|
2020-03-02 18:03:58 -05:00
|
|
|
log_rebake_errors(post_rebake_errors)
|
|
|
|
|
2022-05-22 23:14:11 -04:00
|
|
|
puts "", "Rebaking and uploading complete!", ""
|
2020-01-06 21:27:24 -05:00
|
|
|
end
|
|
|
|
|
2020-03-02 18:03:58 -05:00
|
|
|
puts "", "Secure media is now disabled!", ""
|
|
|
|
end
|
|
|
|
|
2019-11-17 20:25:42 -05:00
|
|
|
##
|
|
|
|
# Run this task whenever the secure_media or login_required
|
|
|
|
# settings are changed for a Discourse instance to update
|
2020-03-02 18:03:58 -05:00
|
|
|
# the upload secure flag and S3 upload ACLs. Any uploads that
|
|
|
|
# have their secure status changed will have all associated posts
|
|
|
|
# rebaked.
|
|
|
|
task "uploads:secure_upload_analyse_and_update" => :environment do
|
2019-11-17 20:25:42 -05:00
|
|
|
RailsMultisite::ConnectionManagement.each_connection do |db|
|
|
|
|
unless Discourse.store.external?
|
|
|
|
puts "This task only works for external storage."
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
|
2020-03-02 18:03:58 -05:00
|
|
|
puts "Analyzing security for uploads in #{db}...", ""
|
2022-05-22 23:14:11 -04:00
|
|
|
all_upload_ids_changed, post_ids_to_rebake = nil
|
2019-11-17 20:25:42 -05:00
|
|
|
Upload.transaction do
|
2020-02-16 23:21:43 -05:00
|
|
|
# If secure media is enabled we need to first set the access control post of
|
|
|
|
# all post uploads (even uploads that are linked to multiple posts). If the
|
|
|
|
# upload is not set to secure media then this has no other effect on the upload,
|
|
|
|
# but we _must_ know what the access control post is because the with_secure_media?
|
|
|
|
# method is on the post, and this knows about the category security & PM status
|
|
|
|
if SiteSetting.secure_media?
|
|
|
|
update_uploads_access_control_post
|
|
|
|
end
|
|
|
|
|
2022-05-22 23:14:11 -04:00
|
|
|
puts "", "Analysing which uploads need to be marked secure and be rebaked.", ""
|
2019-11-17 20:25:42 -05:00
|
|
|
if SiteSetting.login_required?
|
2022-05-22 23:14:11 -04:00
|
|
|
# Simply mark all uploads linked to posts secure if login_required because no anons will be able to access them.
|
|
|
|
post_ids_to_rebake, all_upload_ids_changed = mark_all_as_secure_login_required
|
2019-11-17 20:25:42 -05:00
|
|
|
else
|
2022-05-22 23:14:11 -04:00
|
|
|
# Otherwise only mark uploads linked to posts in secure categories or PMs as secure.
|
|
|
|
post_ids_to_rebake, all_upload_ids_changed = update_specific_upload_security_no_login_required
|
2020-03-02 18:03:58 -05:00
|
|
|
end
|
2019-11-17 20:25:42 -05:00
|
|
|
end
|
2020-03-02 18:03:58 -05:00
|
|
|
|
|
|
|
# Enqueue rebakes AFTER upload transaction complete, so there is no race condition
|
|
|
|
# between updating the DB and the rebakes occurring.
|
2022-05-22 23:14:11 -04:00
|
|
|
post_rebake_errors = rebake_upload_posts(post_ids_to_rebake)
|
2020-03-02 18:03:58 -05:00
|
|
|
log_rebake_errors(post_rebake_errors)
|
|
|
|
|
|
|
|
# Also do this AFTER upload transaction complete so we don't end up with any
|
|
|
|
# errors leaving ACLs in a bad state (the ACL sync task can be run to fix any
|
|
|
|
# outliers at any time).
|
2022-05-22 23:14:11 -04:00
|
|
|
adjust_acls(all_upload_ids_changed)
|
2019-11-17 20:25:42 -05:00
|
|
|
end
|
2020-03-02 18:03:58 -05:00
|
|
|
puts "", "", "Done!"
|
2019-11-17 20:25:42 -05:00
|
|
|
end
|
|
|
|
|
2022-05-22 23:14:11 -04:00
|
|
|
def adjust_acls(upload_ids_to_adjust_acl_for)
|
|
|
|
jobs_to_create = (upload_ids_to_adjust_acl_for.count.to_f / 100.00).ceil
|
|
|
|
|
|
|
|
if jobs_to_create > 1
|
|
|
|
puts "Adjusting ACLs for #{upload_ids_to_adjust_acl_for} uploads. These will be batched across #{jobs_to_create} sync job(s)."
|
|
|
|
end
|
|
|
|
|
|
|
|
upload_ids_to_adjust_acl_for.each_slice(100) do |upload_ids|
|
|
|
|
Jobs.enqueue(:sync_acls_for_uploads, upload_ids: upload_ids)
|
|
|
|
end
|
|
|
|
|
|
|
|
if jobs_to_create > 1
|
|
|
|
puts "ACL batching complete. Keep an eye on the Sidekiq queue for progress."
|
2020-03-02 18:03:58 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-05-22 23:14:11 -04:00
|
|
|
def mark_all_as_secure_login_required
|
|
|
|
post_upload_ids_marked_secure = DB.query_single(<<~SQL)
|
|
|
|
WITH upl AS (
|
|
|
|
SELECT DISTINCT ON (upload_id) upload_id
|
|
|
|
FROM post_uploads
|
|
|
|
INNER JOIN posts ON posts.id = post_uploads.post_id
|
|
|
|
INNER JOIN topics ON topics.id = posts.topic_id
|
|
|
|
)
|
|
|
|
UPDATE uploads
|
|
|
|
SET secure = true,
|
|
|
|
security_last_changed_reason = 'upload security rake task mark as secure',
|
|
|
|
security_last_changed_at = NOW()
|
|
|
|
FROM upl
|
|
|
|
WHERE uploads.id = upl.upload_id AND NOT uploads.secure
|
|
|
|
RETURNING uploads.id
|
|
|
|
SQL
|
|
|
|
puts "Marked #{post_upload_ids_marked_secure.count} upload(s) as secure because login_required is true.", ""
|
|
|
|
upload_ids_marked_not_secure = DB.query_single(<<~SQL, post_upload_ids_marked_secure)
|
|
|
|
UPDATE uploads
|
|
|
|
SET secure = false,
|
|
|
|
security_last_changed_reason = 'upload security rake task mark as not secure',
|
|
|
|
security_last_changed_at = NOW()
|
|
|
|
WHERE id NOT IN (?) AND uploads.secure
|
|
|
|
RETURNING uploads.id
|
|
|
|
SQL
|
|
|
|
puts "Marked #{upload_ids_marked_not_secure.count} upload(s) as not secure because they are not linked to posts.", ""
|
2019-11-17 20:25:42 -05:00
|
|
|
puts "Finished marking upload(s) as secure."
|
2022-05-22 23:14:11 -04:00
|
|
|
|
|
|
|
post_ids_to_rebake = DB.query_single(
|
|
|
|
"SELECT DISTINCT post_id FROM post_uploads WHERE upload_id IN (?)", post_upload_ids_marked_secure
|
|
|
|
)
|
|
|
|
[post_ids_to_rebake, (post_upload_ids_marked_secure + upload_ids_marked_not_secure).uniq]
|
2019-11-17 20:25:42 -05:00
|
|
|
end
|
|
|
|
|
2020-02-16 23:21:43 -05:00
|
|
|
def log_rebake_errors(rebake_errors)
|
|
|
|
return if rebake_errors.empty?
|
|
|
|
puts "The following post rebakes failed with error:", ""
|
|
|
|
rebake_errors.each do |message|
|
2019-11-17 20:25:42 -05:00
|
|
|
puts message
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-05-22 23:14:11 -04:00
|
|
|
def update_specific_upload_security_no_login_required
|
|
|
|
# A simplification of the rules found in UploadSecurity which is a lot faster than
|
|
|
|
# having to loop through records and use that class to check security.
|
|
|
|
post_upload_ids_marked_secure = DB.query_single(<<~SQL)
|
|
|
|
WITH upl AS (
|
|
|
|
SELECT DISTINCT ON (upload_id) upload_id
|
|
|
|
FROM post_uploads
|
|
|
|
INNER JOIN posts ON posts.id = post_uploads.post_id
|
|
|
|
INNER JOIN topics ON topics.id = posts.topic_id
|
|
|
|
LEFT JOIN categories ON categories.id = topics.category_id
|
|
|
|
WHERE (topics.category_id IS NOT NULL AND categories.read_restricted) OR
|
|
|
|
(topics.archetype = 'private_message')
|
2021-01-28 18:03:44 -05:00
|
|
|
)
|
2022-05-22 23:14:11 -04:00
|
|
|
UPDATE uploads
|
|
|
|
SET secure = true,
|
|
|
|
security_last_changed_reason = 'upload security rake task mark as secure',
|
|
|
|
security_last_changed_at = NOW()
|
|
|
|
FROM upl
|
|
|
|
WHERE uploads.id = upl.upload_id AND NOT uploads.secure
|
|
|
|
RETURNING uploads.id
|
|
|
|
SQL
|
|
|
|
puts "Marked #{post_upload_ids_marked_secure.length} uploads as secure."
|
|
|
|
|
|
|
|
# Anything in a public category or a regular topic should not be secure.
|
|
|
|
post_upload_ids_marked_not_secure = DB.query_single(<<~SQL)
|
|
|
|
WITH upl AS (
|
|
|
|
SELECT DISTINCT ON (upload_id) upload_id
|
|
|
|
FROM post_uploads
|
|
|
|
INNER JOIN posts ON posts.id = post_uploads.post_id
|
|
|
|
INNER JOIN topics ON topics.id = posts.topic_id
|
|
|
|
LEFT JOIN categories ON categories.id = topics.category_id
|
|
|
|
WHERE (topics.archetype = 'regular' AND topics.category_id IS NOT NULL AND NOT categories.read_restricted) OR
|
|
|
|
(topics.archetype = 'regular' AND topics.category_id IS NULL)
|
2021-01-28 18:03:44 -05:00
|
|
|
)
|
2022-05-22 23:14:11 -04:00
|
|
|
UPDATE uploads
|
|
|
|
SET secure = false,
|
|
|
|
security_last_changed_reason = 'upload security rake task mark as not secure',
|
|
|
|
security_last_changed_at = NOW()
|
|
|
|
FROM upl
|
|
|
|
WHERE uploads.id = upl.upload_id AND uploads.secure
|
|
|
|
RETURNING uploads.id
|
|
|
|
SQL
|
|
|
|
puts "Marked #{post_upload_ids_marked_not_secure.length} uploads as not secure."
|
|
|
|
|
|
|
|
# Everything else should not be secure!
|
|
|
|
upload_ids_changed = (post_upload_ids_marked_secure + post_upload_ids_marked_not_secure).uniq
|
|
|
|
upload_ids_marked_not_secure = DB.query_single(<<~SQL, upload_ids_changed)
|
|
|
|
UPDATE uploads
|
|
|
|
SET secure = false,
|
|
|
|
security_last_changed_reason = 'upload security rake task mark as not secure',
|
|
|
|
security_last_changed_at = NOW()
|
|
|
|
WHERE id NOT IN (?) AND uploads.secure
|
|
|
|
RETURNING uploads.id
|
|
|
|
SQL
|
|
|
|
puts "Finished updating upload security. Marked #{upload_ids_marked_not_secure.length} uploads not linked to posts as not secure."
|
|
|
|
|
|
|
|
all_upload_ids_changed = (upload_ids_changed + upload_ids_marked_not_secure).uniq
|
|
|
|
post_ids_to_rebake = DB.query_single("SELECT DISTINCT post_id FROM post_uploads WHERE upload_id IN (?)", upload_ids_changed)
|
|
|
|
[post_ids_to_rebake, all_upload_ids_changed]
|
2019-11-17 20:25:42 -05:00
|
|
|
end
|
|
|
|
|
2020-02-16 23:21:43 -05:00
|
|
|
def update_uploads_access_control_post
|
2022-05-22 23:14:11 -04:00
|
|
|
DB.exec(<<~SQL)
|
|
|
|
WITH upl AS (
|
|
|
|
SELECT DISTINCT ON (upload_id) upload_id, post_id FROM post_uploads ORDER BY upload_id, post_id
|
|
|
|
)
|
|
|
|
UPDATE uploads
|
|
|
|
SET access_control_post_id = upl.post_id
|
|
|
|
FROM upl
|
|
|
|
WHERE uploads.id = upl.upload_id
|
2020-02-16 23:21:43 -05:00
|
|
|
SQL
|
|
|
|
end
|
|
|
|
|
2022-05-22 23:14:11 -04:00
|
|
|
def rebake_upload_posts(post_ids_to_rebake)
|
|
|
|
posts_to_rebake = Post.where(id: post_ids_to_rebake)
|
2020-02-16 23:21:43 -05:00
|
|
|
post_rebake_errors = []
|
|
|
|
puts "", "Rebaking #{posts_to_rebake.length} posts with affected uploads.", ""
|
|
|
|
begin
|
|
|
|
i = 0
|
|
|
|
posts_to_rebake.each do |post|
|
2020-03-02 18:03:58 -05:00
|
|
|
RakeHelpers.print_status_with_label("Rebaking posts.....", i, posts_to_rebake.length)
|
2020-02-16 23:21:43 -05:00
|
|
|
post.rebake!
|
|
|
|
i += 1
|
|
|
|
end
|
|
|
|
|
|
|
|
RakeHelpers.print_status_with_label("Rebaking complete! ", i, posts_to_rebake.length)
|
|
|
|
puts ""
|
|
|
|
rescue => e
|
|
|
|
post_rebake_errors << e.message
|
|
|
|
end
|
|
|
|
post_rebake_errors
|
|
|
|
end
|
|
|
|
|
2019-05-22 01:24:36 -04:00
|
|
|
def inline_uploads(post)
|
|
|
|
replaced = false
|
|
|
|
|
|
|
|
original_raw = post.raw
|
|
|
|
|
|
|
|
post.raw = post.raw.gsub(/(\((\/uploads\S+).*\))/) do
|
|
|
|
upload = Upload.find_by(url: $2)
|
2019-05-22 01:51:09 -04:00
|
|
|
if !upload
|
|
|
|
data = Upload.extract_url($2)
|
|
|
|
if data && sha1 = data[2]
|
|
|
|
upload = Upload.find_by(sha1: sha1)
|
|
|
|
if !upload
|
|
|
|
sha_map = JSON.parse(post.custom_fields["UPLOAD_SHA1_MAP"] || "{}")
|
|
|
|
if mapped_sha = sha_map[sha1]
|
|
|
|
upload = Upload.find_by(sha1: mapped_sha)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2019-05-22 01:24:36 -04:00
|
|
|
result = $1
|
|
|
|
|
|
|
|
if upload&.id
|
|
|
|
result.sub!($2, upload.short_url)
|
|
|
|
replaced = true
|
|
|
|
else
|
|
|
|
puts "Upload not found #{$2} in Post #{post.id} - #{post.url}"
|
|
|
|
end
|
|
|
|
result
|
|
|
|
end
|
|
|
|
|
|
|
|
if replaced
|
2019-05-23 01:09:16 -04:00
|
|
|
puts "Corrected image urls in #{post.full_url} raw backup stored in custom field"
|
2019-05-22 01:24:36 -04:00
|
|
|
post.custom_fields["BACKUP_POST_RAW"] = original_raw
|
|
|
|
post.save_custom_fields
|
2019-05-23 01:09:16 -04:00
|
|
|
post.save!(validate: false)
|
2019-05-22 01:24:36 -04:00
|
|
|
post.rebake!
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-05-23 01:09:16 -04:00
|
|
|
def inline_img_tags(post)
|
|
|
|
replaced = false
|
|
|
|
|
|
|
|
original_raw = post.raw
|
|
|
|
post.raw = post.raw.gsub(/(<img\s+src=["'](\/uploads\/[^'"]*)["'].*>)/i) do
|
|
|
|
next if $2.include?("..")
|
|
|
|
|
|
|
|
upload = Upload.find_by(url: $2)
|
|
|
|
if !upload
|
|
|
|
data = Upload.extract_url($2)
|
|
|
|
if data && sha1 = data[2]
|
|
|
|
upload = Upload.find_by(sha1: sha1)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if !upload
|
|
|
|
local_file = File.join(Rails.root, "public", $2)
|
|
|
|
if File.exist?(local_file)
|
2019-05-23 01:28:41 -04:00
|
|
|
File.open(local_file) do |f|
|
|
|
|
upload = UploadCreator.new(f, "image").create_for(post.user_id)
|
2019-05-23 01:09:16 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if upload
|
|
|
|
replaced = true
|
|
|
|
"![image](#{upload.short_url})"
|
|
|
|
else
|
|
|
|
puts "skipping missing upload in #{post.full_url} #{$1}"
|
|
|
|
$1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if replaced
|
|
|
|
puts "Corrected image urls in #{post.full_url} raw backup stored in custom field"
|
|
|
|
post.custom_fields["BACKUP_POST_RAW"] = original_raw
|
|
|
|
post.save_custom_fields
|
|
|
|
post.save!(validate: false)
|
|
|
|
post.rebake!
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def fix_relative_links
|
2019-05-22 01:24:36 -04:00
|
|
|
Post.where('raw like ?', '%](/uploads%').find_each do |post|
|
|
|
|
inline_uploads(post)
|
|
|
|
end
|
2019-05-23 01:09:16 -04:00
|
|
|
Post.where("raw ilike ?", '%<img%src=%/uploads/%>%').find_each do |post|
|
|
|
|
inline_img_tags(post)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
task "uploads:fix_relative_upload_links" => :environment do
|
|
|
|
if RailsMultisite::ConnectionManagement.current_db != "default"
|
|
|
|
fix_relative_links
|
|
|
|
else
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do
|
|
|
|
fix_relative_links
|
|
|
|
end
|
|
|
|
end
|
2019-05-22 01:24:36 -04:00
|
|
|
end
|
2020-08-11 23:32:47 -04:00
|
|
|
|
2020-08-12 18:26:13 -04:00
|
|
|
def analyze_missing_s3
|
2020-08-11 23:32:47 -04:00
|
|
|
puts "List of posts with missing images:"
|
|
|
|
sql = <<~SQL
|
2020-08-12 00:04:21 -04:00
|
|
|
SELECT post_id, url, sha1, extension, uploads.id
|
2020-08-11 23:32:47 -04:00
|
|
|
FROM post_uploads pu
|
2020-08-12 00:04:21 -04:00
|
|
|
RIGHT JOIN uploads on uploads.id = pu.upload_id
|
2020-09-16 23:35:29 -04:00
|
|
|
WHERE verification_status = :invalid_etag
|
2020-08-11 23:32:47 -04:00
|
|
|
ORDER BY created_at
|
|
|
|
SQL
|
|
|
|
|
|
|
|
lookup = {}
|
2020-08-12 00:04:21 -04:00
|
|
|
other = []
|
2020-08-26 03:48:42 -04:00
|
|
|
all = []
|
2020-08-26 21:49:50 -04:00
|
|
|
|
2020-09-16 23:35:29 -04:00
|
|
|
DB.query(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |r|
|
2020-08-26 03:48:42 -04:00
|
|
|
all << r
|
2020-08-12 00:04:21 -04:00
|
|
|
if r.post_id
|
|
|
|
lookup[r.post_id] ||= []
|
|
|
|
lookup[r.post_id] << [r.url, r.sha1, r.extension]
|
|
|
|
else
|
|
|
|
other << r
|
|
|
|
end
|
2020-08-11 23:32:47 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
posts = Post.where(id: lookup.keys)
|
|
|
|
posts.order(:created_at).each do |post|
|
|
|
|
puts "#{Discourse.base_url}/p/#{post.id} #{lookup[post.id].length} missing, #{post.created_at}"
|
|
|
|
lookup[post.id].each do |url, sha1, extension|
|
|
|
|
puts url
|
|
|
|
puts "#{Upload.base62_sha1(sha1)}.#{extension}"
|
|
|
|
end
|
|
|
|
puts
|
|
|
|
end
|
|
|
|
|
2020-09-16 23:35:29 -04:00
|
|
|
missing_uploads = Upload.where(verification_status: Upload.verification_statuses[:invalid_etag])
|
2020-08-26 21:49:50 -04:00
|
|
|
puts "Total missing uploads: #{missing_uploads.count}, newest is #{missing_uploads.maximum(:created_at)}"
|
2020-08-11 23:32:47 -04:00
|
|
|
puts "Total problem posts: #{lookup.keys.count} with #{lookup.values.sum { |a| a.length } } missing uploads"
|
2020-08-12 00:04:21 -04:00
|
|
|
puts "Other missing uploads count: #{other.count}"
|
2020-08-26 03:48:42 -04:00
|
|
|
|
|
|
|
if all.count > 0
|
|
|
|
ids = all.map { |r| r.id }
|
|
|
|
|
|
|
|
lookups = [
|
|
|
|
[:post_uploads, :upload_id],
|
|
|
|
[:users, :uploaded_avatar_id],
|
|
|
|
[:user_avatars, :gravatar_upload_id],
|
|
|
|
[:user_avatars, :custom_upload_id],
|
|
|
|
[:site_settings, ["NULLIF(value, '')::integer", "data_type = #{SiteSettings::TypeSupervisor.types[:upload].to_i}"]],
|
|
|
|
[:user_profiles, :profile_background_upload_id],
|
|
|
|
[:user_profiles, :card_background_upload_id],
|
|
|
|
[:categories, :uploaded_logo_id],
|
|
|
|
[:categories, :uploaded_background_id],
|
|
|
|
[:custom_emojis, :upload_id],
|
|
|
|
[:theme_fields, :upload_id],
|
|
|
|
[:user_exports, :upload_id],
|
|
|
|
[:groups, :flair_upload_id],
|
|
|
|
]
|
|
|
|
|
|
|
|
lookups.each do |table, (column, where)|
|
|
|
|
count = DB.query_single(<<~SQL, ids: ids).first
|
|
|
|
SELECT COUNT(*) FROM #{table} WHERE #{column} IN (:ids) #{"AND #{where}" if where}
|
|
|
|
SQL
|
|
|
|
if count > 0
|
|
|
|
puts "Found #{count} missing row#{"s" if count > 1} in #{table}(#{column})"
|
|
|
|
end
|
2020-08-12 03:28:41 -04:00
|
|
|
end
|
2020-08-26 03:48:42 -04:00
|
|
|
|
2020-08-12 00:04:21 -04:00
|
|
|
end
|
|
|
|
|
2020-08-11 23:32:47 -04:00
|
|
|
end
|
|
|
|
|
2020-08-26 21:49:50 -04:00
|
|
|
def delete_missing_s3
|
2020-09-16 23:35:29 -04:00
|
|
|
missing = Upload.where(
|
|
|
|
verification_status: Upload.verification_statuses[:invalid_etag]
|
|
|
|
).order(:created_at)
|
2020-08-26 21:49:50 -04:00
|
|
|
count = missing.count
|
|
|
|
if count > 0
|
|
|
|
puts "The following uploads will be deleted from the database"
|
|
|
|
missing.each do |upload|
|
|
|
|
puts "#{upload.id} - #{upload.url} - #{upload.created_at}"
|
|
|
|
end
|
|
|
|
puts "Please confirm you wish to delete #{count} upload records by typing YES"
|
|
|
|
confirm = STDIN.gets.strip
|
|
|
|
if confirm == "YES"
|
|
|
|
missing.destroy_all
|
|
|
|
puts "#{count} records were deleted"
|
|
|
|
else
|
|
|
|
STDERR.puts "Aborting"
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
task "uploads:delete_missing_s3" => :environment do
|
|
|
|
if RailsMultisite::ConnectionManagement.current_db != "default"
|
|
|
|
delete_missing_s3
|
|
|
|
else
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do
|
|
|
|
delete_missing_s3
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-08-12 18:26:13 -04:00
|
|
|
task "uploads:analyze_missing_s3" => :environment do
|
2020-08-11 23:32:47 -04:00
|
|
|
if RailsMultisite::ConnectionManagement.current_db != "default"
|
2020-08-12 18:26:13 -04:00
|
|
|
analyze_missing_s3
|
2020-08-11 23:32:47 -04:00
|
|
|
else
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do
|
2020-08-12 18:26:13 -04:00
|
|
|
analyze_missing_s3
|
2020-08-11 23:32:47 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-08-12 18:26:13 -04:00
|
|
|
def fix_missing_s3
|
2020-08-11 23:32:47 -04:00
|
|
|
Jobs.run_immediately!
|
2020-08-12 21:22:14 -04:00
|
|
|
|
|
|
|
puts "Attempting to download missing uploads and recreate"
|
2020-09-16 23:35:29 -04:00
|
|
|
ids = Upload.where(
|
|
|
|
verification_status: Upload.verification_statuses[:invalid_etag]
|
|
|
|
).pluck(:id)
|
2020-08-12 21:22:14 -04:00
|
|
|
ids.each do |id|
|
2021-06-22 11:00:55 -04:00
|
|
|
upload = Upload.find_by(id: id)
|
|
|
|
next if !upload
|
2020-08-12 21:22:14 -04:00
|
|
|
|
2020-08-12 23:48:11 -04:00
|
|
|
tempfile = nil
|
2022-01-19 05:05:58 -05:00
|
|
|
downloaded_from = nil
|
2020-08-12 23:48:11 -04:00
|
|
|
|
|
|
|
begin
|
|
|
|
tempfile = FileHelper.download(upload.url, max_file_size: 30.megabyte, tmp_file_name: "#{SecureRandom.hex}.#{upload.extension}")
|
2022-01-19 05:05:58 -05:00
|
|
|
downloaded_from = upload.url
|
2020-08-12 23:48:11 -04:00
|
|
|
rescue => e
|
2022-01-19 05:05:58 -05:00
|
|
|
if upload.origin.present?
|
|
|
|
begin
|
|
|
|
tempfile = FileHelper.download(upload.origin, max_file_size: 30.megabyte, tmp_file_name: "#{SecureRandom.hex}.#{upload.extension}")
|
|
|
|
downloaded_from = upload.origin
|
|
|
|
rescue => e
|
|
|
|
puts "Failed to download #{upload.origin} #{e}"
|
|
|
|
end
|
|
|
|
else
|
|
|
|
puts "Failed to download #{upload.url} #{e}"
|
|
|
|
end
|
2020-08-12 23:48:11 -04:00
|
|
|
end
|
|
|
|
|
2020-08-12 21:22:14 -04:00
|
|
|
if tempfile
|
2022-01-19 05:05:58 -05:00
|
|
|
puts "Successfully downloaded upload id: #{upload.id} - #{downloaded_from} fixing upload"
|
2020-08-12 21:22:14 -04:00
|
|
|
|
|
|
|
fixed_upload = nil
|
2020-08-18 03:55:35 -04:00
|
|
|
fix_error = nil
|
2020-08-12 21:22:14 -04:00
|
|
|
Upload.transaction do
|
2020-08-18 03:55:35 -04:00
|
|
|
begin
|
2021-03-25 06:35:29 -04:00
|
|
|
upload.update_column(:sha1, SecureRandom.hex)
|
2021-05-19 11:24:52 -04:00
|
|
|
fixed_upload = UploadCreator.new(tempfile, "temp.#{upload.extension}", skip_validations: true).create_for(Discourse.system_user.id)
|
2020-08-18 03:55:35 -04:00
|
|
|
rescue => fix_error
|
|
|
|
# invalid extension is the most common issue
|
|
|
|
end
|
2020-08-12 21:22:14 -04:00
|
|
|
raise ActiveRecord::Rollback
|
|
|
|
end
|
|
|
|
|
2020-08-18 03:55:35 -04:00
|
|
|
if fix_error
|
2020-08-27 22:28:41 -04:00
|
|
|
puts "Failed to fix upload #{fix_error}"
|
2020-08-18 03:55:35 -04:00
|
|
|
else
|
|
|
|
# we do not fix sha, it may be wrong for arbitrary reasons, if we correct it
|
|
|
|
# we may end up breaking posts
|
2021-07-07 09:23:43 -04:00
|
|
|
save_error = nil
|
|
|
|
begin
|
|
|
|
upload.assign_attributes(etag: fixed_upload.etag, url: fixed_upload.url, verification_status: Upload.verification_statuses[:unchecked])
|
|
|
|
upload.save!(validate: false)
|
|
|
|
rescue => save_error
|
|
|
|
# url might be null
|
|
|
|
end
|
2020-08-18 01:37:11 -04:00
|
|
|
|
2021-07-07 09:23:43 -04:00
|
|
|
if save_error
|
2021-07-07 10:29:03 -04:00
|
|
|
puts "Failed to save upload #{save_error}"
|
2021-07-07 09:23:43 -04:00
|
|
|
else
|
2021-07-07 07:27:24 -04:00
|
|
|
OptimizedImage.where(upload_id: upload.id).destroy_all
|
|
|
|
rebake_ids = PostUpload.where(upload_id: upload.id).pluck(:post_id)
|
2020-08-18 01:37:11 -04:00
|
|
|
|
2021-07-07 07:27:24 -04:00
|
|
|
if rebake_ids.present?
|
|
|
|
Post.where(id: rebake_ids).each do |post|
|
|
|
|
puts "rebake post #{post.id}"
|
|
|
|
post.rebake!
|
|
|
|
end
|
2020-08-18 03:55:35 -04:00
|
|
|
end
|
2020-08-18 01:37:11 -04:00
|
|
|
end
|
|
|
|
end
|
2020-08-12 21:22:14 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-08-11 23:32:47 -04:00
|
|
|
puts "Attempting to automatically fix problem uploads"
|
|
|
|
puts
|
|
|
|
puts "Rebaking posts with missing uploads, this can take a while as all rebaking runs inline"
|
|
|
|
|
|
|
|
sql = <<~SQL
|
|
|
|
SELECT post_id
|
|
|
|
FROM post_uploads pu
|
|
|
|
JOIN uploads on uploads.id = pu.upload_id
|
2020-09-16 23:35:29 -04:00
|
|
|
WHERE verification_status = :invalid_etag
|
2020-08-11 23:32:47 -04:00
|
|
|
ORDER BY post_id DESC
|
|
|
|
SQL
|
|
|
|
|
2020-09-16 23:35:29 -04:00
|
|
|
DB.query_single(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |post_id|
|
2020-08-26 21:49:50 -04:00
|
|
|
post = Post.find_by(id: post_id)
|
|
|
|
if post
|
|
|
|
post.rebake!
|
|
|
|
print "."
|
|
|
|
else
|
|
|
|
puts "Skipping #{post_id} since it is deleted"
|
|
|
|
end
|
2020-08-11 23:32:47 -04:00
|
|
|
end
|
|
|
|
puts
|
|
|
|
end
|
|
|
|
|
2020-08-12 18:26:13 -04:00
|
|
|
task "uploads:fix_missing_s3" => :environment do
|
2020-08-11 23:32:47 -04:00
|
|
|
if RailsMultisite::ConnectionManagement.current_db != "default"
|
2020-08-27 22:35:35 -04:00
|
|
|
fix_missing_s3
|
2020-08-11 23:32:47 -04:00
|
|
|
else
|
|
|
|
RailsMultisite::ConnectionManagement.each_connection do
|
2020-08-27 22:35:35 -04:00
|
|
|
fix_missing_s3
|
2020-08-11 23:32:47 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|