FIX: Make clean up upload script a safer task to run.

This commit is contained in:
Guo Xiang Tan 2016-09-02 14:50:13 +08:00
parent cdc1d7e46c
commit e4b75f604c
4 changed files with 101 additions and 75 deletions

View File

@ -1,4 +1,3 @@
require "digest/sha1"
require_dependency "file_helper"
require_dependency "url_helper"
require_dependency "db_helper"
@ -54,7 +53,7 @@ class OptimizedImage < ActiveRecord::Base
if resized
thumbnail = OptimizedImage.create!(
upload_id: upload.id,
sha1: Digest::SHA1.file(temp_path).hexdigest,
sha1: Upload.generate_digest(temp_path),
extension: extension,
width: width,
height: height,
@ -231,7 +230,7 @@ class OptimizedImage < ActiveRecord::Base
end
# compute SHA if missing
if optimized_image.sha1.blank?
optimized_image.sha1 = Digest::SHA1.file(path).hexdigest
optimized_image.sha1 = Upload.generate_digest(path)
end
# optimize if image
ImageOptim.new.optimize_image!(path)

View File

@ -81,6 +81,10 @@ class Upload < ActiveRecord::Base
use
}
def self.generate_digest(path)
Digest::SHA1.file(path).hexdigest
end
def self.svg_whitelist_xpath
@@svg_whitelist_xpath ||= "//*[#{WHITELISTED_SVG_ELEMENTS.map { |e| "name()!='#{e}'" }.join(" and ") }]"
end
@ -145,7 +149,7 @@ class Upload < ActiveRecord::Base
end
# compute the sha of the file
sha1 = Digest::SHA1.file(file).hexdigest
sha1 = Upload.generate_digest(file)
# do we already have that upload?
upload = find_by(sha1: sha1)
@ -259,7 +263,7 @@ class Upload < ActiveRecord::Base
end
# compute SHA if missing
if upload.sha1.blank?
upload.sha1 = Digest::SHA1.file(path).hexdigest
upload.sha1 = Upload.generate_digest(path)
end
# optimize if image
if FileHelper.is_image?(File.basename(path))

View File

@ -68,7 +68,7 @@ task "uploads:backfill_shas" => :environment do
Upload.where(sha1: nil).find_each do |u|
begin
path = Discourse.store.path_for(u)
u.sha1 = Digest::SHA1.file(path).hexdigest
u.sha1 = Upload.generate_digest(path)
u.save!
putc "."
rescue => e
@ -238,78 +238,96 @@ end
################################################################################
task "uploads:clean_up" => :environment do
if ENV["RAILS_DB"]
clean_up_uploads
else
RailsMultisite::ConnectionManagement.each_connection { clean_up_uploads }
end
end
RailsMultisite::ConnectionManagement.each_connection do |db|
puts "Cleaning up uploads and thumbnails for '#{db}'..."
def clean_up_uploads
db = RailsMultisite::ConnectionManagement.current_db
if Discourse.store.external?
puts "This task only works for internal storages."
next
end
public_directory = "#{Rails.root}/public"
##
## DATABASE vs FILE SYSTEM
##
# uploads & avatars
Upload.find_each do |upload|
path = "#{public_directory}#{upload.url}"
if !File.exists?(path)
upload.destroy rescue nil
putc "#"
else
putc "."
end
end
# optimized images
OptimizedImage.find_each do |optimized_image|
path = "#{public_directory}#{optimized_image.url}"
if !File.exists?(path)
optimized_image.destroy rescue nil
putc "#"
else
putc "."
end
end
##
## FILE SYSTEM vs DATABASE
##
uploads_directory = "#{public_directory}/uploads/#{db}"
# avatars (no avatar should be stored in that old directory)
FileUtils.rm_rf("#{uploads_directory}/avatars") rescue nil
# uploads
Dir.glob("#{uploads_directory}/*/*.*").each do |f|
url = "/uploads/#{db}/" << f.split("/uploads/#{db}/")[1]
if !Upload.where(url: url).exists?
FileUtils.rm(f) rescue nil
putc "#"
else
putc "."
end
end
# optimized images
Dir.glob("#{uploads_directory}/_optimized/*/*/*.*").each do |f|
url = "/uploads/#{db}/_optimized/" << f.split("/uploads/#{db}/_optimized/")[1]
if !OptimizedImage.where(url: url).exists?
FileUtils.rm(f) rescue nil
putc "#"
else
putc "."
end
end
puts
puts "Cleaning up uploads and thumbnails for '#{db}'..."
if Discourse.store.external?
puts "This task only works for internal storages."
exit 1
end
puts <<~OUTPUT
This task will remove upload records and files permanently.
Would you like to take a full backup before the clean up? (Y/N)
OUTPUT
if STDIN.gets.chomp.downcase == 'y'
puts "Starting backup..."
backuper = BackupRestore::Backuper.new(Discourse.system_user.id)
backuper.run
exit 1 unless backuper.success
end
public_directory = Rails.root.join("public").to_s
##
## DATABASE vs FILE SYSTEM
##
# uploads & avatars
Upload.find_each do |upload|
path = File.join(public_directory, upload.url)
if !File.exists?(path)
upload.destroy!
putc "#"
else
putc "."
end
end
# optimized images
OptimizedImage.find_each do |optimized_image|
path = File.join(public_directory, optimized_image.url)
if !File.exists?(path)
optimized_image.destroy!
putc "#"
else
putc "."
end
end
##
## FILE SYSTEM vs DATABASE
##
uploads_directory = File.join(public_directory, 'uploads', db).to_s
# avatars (no avatar should be stored in that old directory)
FileUtils.rm_rf("#{uploads_directory}/avatars")
# uploads and optimized images
Dir.glob("#{uploads_directory}/**/*.*").each do |file_path|
sha1 = Upload.generate_digest(file_path)
url = file_path.split(public_directory, 2)[1]
if (Upload.where(sha1: sha1).empty? &&
Upload.where(url: url).empty?) &&
(OptimizedImage.where(sha1: sha1).empty? &&
OptimizedImage.where(url: url).empty?)
FileUtils.rm(file_path)
putc "#"
else
putc "."
end
end
puts "Removing empty directories..."
puts `find #{uploads_directory} -type d -empty -exec rmdir {} \\;`
puts "Done!"
end
################################################################################

View File

@ -1,5 +1,4 @@
require 'rails_helper'
require 'digest/sha1'
describe Upload do
@ -12,7 +11,7 @@ describe Upload do
let(:image_filename) { "logo.png" }
let(:image) { file_from_fixtures(image_filename) }
let(:image_filesize) { File.size(image) }
let(:image_sha1) { Digest::SHA1.file(image).hexdigest }
let(:image_sha1) { Upload.generate_digest(image) }
let(:image_svg_filename) { "image.svg" }
let(:image_svg) { file_from_fixtures(image_svg_filename) }
@ -138,4 +137,10 @@ describe Upload do
end
describe '.generate_digest' do
it "should return the right digest" do
expect(Upload.generate_digest(image.path)).to eq('bc975735dfc6409c1c2aa5ebf2239949bcbdbd65')
end
end
end