2018-09-12 04:51:53 -04:00
|
|
|
class UploadRecovery
|
2018-09-12 09:53:01 -04:00
|
|
|
def initialize(dry_run: false)
|
|
|
|
@dry_run = dry_run
|
|
|
|
end
|
|
|
|
|
2018-09-13 04:32:35 -04:00
|
|
|
def recover(posts = Post)
|
2019-04-10 04:22:35 -04:00
|
|
|
posts.have_uploads.find_each do |post|
|
2019-04-01 23:29:26 -04:00
|
|
|
|
2018-09-12 23:57:51 -04:00
|
|
|
begin
|
|
|
|
analyzer = PostAnalyzer.new(post.raw, post.topic_id)
|
2018-09-12 04:51:53 -04:00
|
|
|
|
2018-09-18 22:44:36 -04:00
|
|
|
analyzer.cooked_stripped.css("img", "a").each do |media|
|
2019-04-01 23:41:00 -04:00
|
|
|
if media.name == "img" && orig_src = media["data-orig-src"]
|
2018-09-18 22:44:36 -04:00
|
|
|
if dom_class = media["class"]
|
|
|
|
if (Post.white_listed_image_classes & dom_class.split).count > 0
|
|
|
|
next
|
|
|
|
end
|
2018-09-12 23:57:51 -04:00
|
|
|
end
|
2018-09-12 04:51:53 -04:00
|
|
|
|
2019-04-01 23:41:00 -04:00
|
|
|
if @dry_run
|
|
|
|
puts "#{post.full_url} #{orig_src}"
|
|
|
|
else
|
|
|
|
recover_post_upload(post, Upload.sha1_from_short_url(orig_src))
|
2018-09-18 22:44:36 -04:00
|
|
|
end
|
2019-04-01 23:41:00 -04:00
|
|
|
elsif url = (media["href"] || media["src"])
|
2019-04-08 16:55:26 -04:00
|
|
|
data = Upload.extract_url(url)
|
2019-04-01 23:41:00 -04:00
|
|
|
next unless data
|
2018-09-18 22:44:36 -04:00
|
|
|
|
2019-04-01 23:41:00 -04:00
|
|
|
sha1 = data[2]
|
2018-09-18 22:44:36 -04:00
|
|
|
|
2019-04-01 23:41:00 -04:00
|
|
|
unless upload = Upload.get_from_url(url)
|
|
|
|
if @dry_run
|
|
|
|
puts "#{post.full_url} #{url}"
|
|
|
|
else
|
|
|
|
recover_post_upload(post, sha1)
|
2018-09-18 22:44:36 -04:00
|
|
|
end
|
2018-09-12 23:57:51 -04:00
|
|
|
end
|
2018-09-12 09:53:01 -04:00
|
|
|
end
|
2018-09-12 04:51:53 -04:00
|
|
|
end
|
2018-09-12 23:57:51 -04:00
|
|
|
rescue => e
|
|
|
|
raise e unless @dry_run
|
|
|
|
puts "#{post.full_url} #{e.class}: #{e.message}"
|
2018-09-12 04:51:53 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2018-09-18 22:44:36 -04:00
|
|
|
def recover_post_upload(post, sha1)
|
2018-09-30 22:51:25 -04:00
|
|
|
return unless valid_sha1?(sha1)
|
2018-09-13 01:59:17 -04:00
|
|
|
|
2018-09-12 04:51:53 -04:00
|
|
|
attributes = {
|
|
|
|
post: post,
|
2018-09-13 01:59:17 -04:00
|
|
|
sha1: sha1
|
2018-09-12 04:51:53 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
if Discourse.store.external?
|
2018-09-30 22:51:25 -04:00
|
|
|
recover_post_upload_from_s3(attributes)
|
2018-09-12 04:51:53 -04:00
|
|
|
else
|
2018-09-30 22:51:25 -04:00
|
|
|
recover_post_upload_from_local(attributes)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def recover_post_upload_from_local(post:, sha1:)
|
|
|
|
recover_from_local(sha1: sha1, user_id: post.user_id) do |upload|
|
|
|
|
post.rebake! if upload.persisted?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def recover_post_upload_from_s3(post:, sha1:)
|
|
|
|
recover_from_s3(sha1: sha1, user_id: post.user_id) do |upload|
|
|
|
|
post.rebake! if upload.persisted?
|
2018-09-12 04:51:53 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-09-30 22:51:25 -04:00
|
|
|
def recover_from_local(sha1:, user_id:)
|
2018-09-12 04:51:53 -04:00
|
|
|
public_path = Rails.root.join("public")
|
|
|
|
|
|
|
|
@paths ||= begin
|
|
|
|
Dir.glob(File.join(
|
|
|
|
public_path,
|
|
|
|
'uploads',
|
|
|
|
'tombstone',
|
|
|
|
RailsMultisite::ConnectionManagement.current_db,
|
|
|
|
'original',
|
|
|
|
'**',
|
|
|
|
'*.*'
|
|
|
|
)).concat(Dir.glob(File.join(
|
|
|
|
public_path,
|
|
|
|
'uploads',
|
|
|
|
RailsMultisite::ConnectionManagement.current_db,
|
|
|
|
'original',
|
|
|
|
'**',
|
|
|
|
'*.*'
|
|
|
|
)))
|
|
|
|
end
|
|
|
|
|
|
|
|
@paths.each do |path|
|
|
|
|
if path =~ /#{sha1}/
|
|
|
|
begin
|
2018-09-19 03:46:23 -04:00
|
|
|
tmp = Tempfile.new
|
|
|
|
tmp.write(File.read(path))
|
|
|
|
tmp.rewind
|
2018-09-30 22:51:25 -04:00
|
|
|
|
|
|
|
upload = create_upload(tmp, File.basename(path), user_id)
|
|
|
|
yield upload if block_given?
|
2018-09-12 04:51:53 -04:00
|
|
|
ensure
|
2018-09-19 03:46:23 -04:00
|
|
|
tmp&.close
|
2018-09-12 04:51:53 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-09-30 22:51:25 -04:00
|
|
|
def recover_from_s3(sha1:, user_id:)
|
2018-09-12 04:51:53 -04:00
|
|
|
@object_keys ||= begin
|
|
|
|
s3_helper = Discourse.store.s3_helper
|
|
|
|
|
|
|
|
s3_helper.list("original").map(&:key).concat(
|
|
|
|
s3_helper.list("#{FileStore::S3Store::TOMBSTONE_PREFIX}original").map(&:key)
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
@object_keys.each do |key|
|
|
|
|
if key =~ /#{sha1}/
|
|
|
|
tombstone_prefix = FileStore::S3Store::TOMBSTONE_PREFIX
|
|
|
|
|
2018-10-01 07:03:02 -04:00
|
|
|
if key.include?(tombstone_prefix)
|
2018-09-12 21:19:45 -04:00
|
|
|
old_key = key
|
|
|
|
key = key.sub(tombstone_prefix, "")
|
|
|
|
|
2018-09-12 04:51:53 -04:00
|
|
|
Discourse.store.s3_helper.copy(
|
2018-09-12 21:19:45 -04:00
|
|
|
old_key,
|
2018-09-12 04:51:53 -04:00
|
|
|
key,
|
|
|
|
options: { acl: "public-read" }
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
url = "https:#{SiteSetting.Upload.absolute_base_url}/#{key}"
|
|
|
|
|
|
|
|
begin
|
|
|
|
tmp = FileHelper.download(
|
|
|
|
url,
|
|
|
|
max_file_size: SiteSetting.max_image_size_kb.kilobytes,
|
|
|
|
tmp_file_name: "recover_from_s3"
|
|
|
|
)
|
|
|
|
|
2018-09-30 22:51:25 -04:00
|
|
|
if tmp
|
|
|
|
upload = create_upload(tmp, File.basename(key), user_id)
|
|
|
|
yield upload if block_given?
|
|
|
|
end
|
2018-09-12 04:51:53 -04:00
|
|
|
ensure
|
|
|
|
tmp&.close
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-09-30 22:51:25 -04:00
|
|
|
def create_upload(file, filename, user_id)
|
|
|
|
UploadCreator.new(file, filename).create_for(user_id)
|
|
|
|
end
|
|
|
|
|
|
|
|
def valid_sha1?(sha1)
|
|
|
|
sha1.present? && sha1.length == Upload::SHA1_LENGTH
|
2018-09-12 04:51:53 -04:00
|
|
|
end
|
|
|
|
end
|