2019-05-02 18:17:27 -04:00
# frozen_string_literal: true
2018-09-12 04:51:53 -04:00
class UploadRecovery
2019-08-01 14:24:06 -04:00
def initialize ( dry_run : false , stop_on_error : false )
2018-09-12 09:53:01 -04:00
@dry_run = dry_run
2019-08-01 14:24:06 -04:00
@stop_on_error = stop_on_error
2018-09-12 09:53:01 -04:00
end
2018-09-13 04:32:35 -04:00
def recover ( posts = Post )
2019-10-02 00:57:36 -04:00
posts . have_uploads . find_each { | post | recover_post post }
end
2019-04-01 23:29:26 -04:00
2019-10-02 00:57:36 -04:00
def recover_post ( post )
begin
analyzer = PostAnalyzer . new ( post . raw , post . topic_id )
2018-09-12 04:51:53 -04:00
2019-10-02 00:57:36 -04:00
analyzer . cooked_stripped . css ( " img " , " a " ) . each do | media |
if media . name == " img " && orig_src = media [ " data-orig-src " ]
if dom_class = media [ " class " ]
2020-07-26 20:23:54 -04:00
if ( Post . allowed_image_classes & dom_class . split ) . count > 0
2019-10-02 00:57:36 -04:00
next
2018-09-12 23:57:51 -04:00
end
2019-10-02 00:57:36 -04:00
end
2018-09-12 04:51:53 -04:00
2019-10-02 00:57:36 -04:00
if @dry_run
puts " #{ post . full_url } #{ orig_src } "
else
recover_post_upload ( post , Upload . sha1_from_short_url ( orig_src ) )
end
elsif url = ( media [ " href " ] || media [ " src " ] )
data = Upload . extract_url ( url )
next unless data
2018-09-18 22:44:36 -04:00
2019-10-02 00:57:36 -04:00
sha1 = data [ 2 ]
2018-09-18 22:44:36 -04:00
2019-10-02 00:57:36 -04:00
unless upload = Upload . get_from_url ( url )
if @dry_run
puts " #{ post . full_url } #{ url } "
else
recover_post_upload ( post , sha1 )
2018-09-12 23:57:51 -04:00
end
2018-09-12 09:53:01 -04:00
end
2018-09-12 04:51:53 -04:00
end
end
2019-10-02 00:57:36 -04:00
rescue = > e
raise e if @stop_on_error
puts " #{ post . full_url } #{ e . class } : #{ e . message } "
2018-09-12 04:51:53 -04:00
end
end
private
2018-09-18 22:44:36 -04:00
def recover_post_upload ( post , sha1 )
2018-09-30 22:51:25 -04:00
return unless valid_sha1? ( sha1 )
2018-09-13 01:59:17 -04:00
2018-09-12 04:51:53 -04:00
attributes = {
post : post ,
2018-09-13 01:59:17 -04:00
sha1 : sha1
2018-09-12 04:51:53 -04:00
}
if Discourse . store . external?
2018-09-30 22:51:25 -04:00
recover_post_upload_from_s3 ( attributes )
2018-09-12 04:51:53 -04:00
else
2018-09-30 22:51:25 -04:00
recover_post_upload_from_local ( attributes )
end
end
2019-05-22 01:24:36 -04:00
def ensure_upload! ( post : , sha1 : , upload : )
return if ! upload . persisted?
if upload . sha1 != sha1
2019-05-22 01:51:09 -04:00
STDERR . puts " Warning #{ post . url } had an incorrect #{ sha1 } should be #{ upload . sha1 } storing in custom field 'rake uploads:fix_relative_upload_links' can fix this "
sha_map = post . custom_fields [ " UPLOAD_SHA1_MAP " ] || " {} "
sha_map = JSON . parse ( sha_map )
sha_map [ sha1 ] = upload . sha1
post . custom_fields [ " UPLOAD_SHA1_MAP " ] = sha_map . to_json
post . save_custom_fields
2019-05-22 01:24:36 -04:00
end
post . rebake!
end
2018-09-30 22:51:25 -04:00
def recover_post_upload_from_local ( post : , sha1 : )
recover_from_local ( sha1 : sha1 , user_id : post . user_id ) do | upload |
2019-05-22 01:24:36 -04:00
ensure_upload! ( post : post , sha1 : sha1 , upload : upload )
2018-09-30 22:51:25 -04:00
end
end
def recover_post_upload_from_s3 ( post : , sha1 : )
recover_from_s3 ( sha1 : sha1 , user_id : post . user_id ) do | upload |
2019-05-22 01:24:36 -04:00
ensure_upload! ( post : post , sha1 : sha1 , upload : upload )
2018-09-12 04:51:53 -04:00
end
end
2018-09-30 22:51:25 -04:00
def recover_from_local ( sha1 : , user_id : )
2018-09-12 04:51:53 -04:00
@paths || = begin
Dir . glob ( File . join (
2020-08-27 09:57:10 -04:00
Discourse . store . tombstone_dir ,
2018-09-12 04:51:53 -04:00
'original' ,
'**' ,
'*.*'
) ) . concat ( Dir . glob ( File . join (
2020-08-27 09:57:10 -04:00
Discourse . store . upload_path ,
2018-09-12 04:51:53 -04:00
'original' ,
'**' ,
'*.*'
) ) )
end
@paths . each do | path |
if path =~ / #{ sha1 } /
begin
2018-09-19 03:46:23 -04:00
tmp = Tempfile . new
tmp . write ( File . read ( path ) )
tmp . rewind
2018-09-30 22:51:25 -04:00
upload = create_upload ( tmp , File . basename ( path ) , user_id )
yield upload if block_given?
2018-09-12 04:51:53 -04:00
ensure
2018-09-19 03:46:23 -04:00
tmp & . close
2018-09-12 04:51:53 -04:00
end
end
end
end
2018-09-30 22:51:25 -04:00
def recover_from_s3 ( sha1 : , user_id : )
2018-09-12 04:51:53 -04:00
@object_keys || = begin
s3_helper = Discourse . store . s3_helper
2019-08-01 21:38:21 -04:00
if Rails . configuration . multisite
current_db = RailsMultisite :: ConnectionManagement . current_db
s3_helper . list ( " uploads/ #{ current_db } /original " ) . map ( & :key ) . concat (
s3_helper . list ( " uploads/ #{ FileStore :: S3Store :: TOMBSTONE_PREFIX } #{ current_db } /original " ) . map ( & :key )
)
else
s3_helper . list ( " original " ) . map ( & :key ) . concat (
s3_helper . list ( " #{ FileStore :: S3Store :: TOMBSTONE_PREFIX } original " ) . map ( & :key )
)
end
2018-09-12 04:51:53 -04:00
end
@object_keys . each do | key |
if key =~ / #{ sha1 } /
tombstone_prefix = FileStore :: S3Store :: TOMBSTONE_PREFIX
2018-10-01 07:03:02 -04:00
if key . include? ( tombstone_prefix )
2018-09-12 21:19:45 -04:00
old_key = key
key = key . sub ( tombstone_prefix , " " )
2018-09-12 04:51:53 -04:00
Discourse . store . s3_helper . copy (
2018-09-12 21:19:45 -04:00
old_key ,
2018-09-12 04:51:53 -04:00
key ,
options : { acl : " public-read " }
)
end
url = " https: #{ SiteSetting . Upload . absolute_base_url } / #{ key } "
begin
tmp = FileHelper . download (
url ,
max_file_size : SiteSetting . max_image_size_kb . kilobytes ,
tmp_file_name : " recover_from_s3 "
)
2018-09-30 22:51:25 -04:00
if tmp
upload = create_upload ( tmp , File . basename ( key ) , user_id )
yield upload if block_given?
end
2018-09-12 04:51:53 -04:00
ensure
tmp & . close
end
end
end
end
2018-09-30 22:51:25 -04:00
def create_upload ( file , filename , user_id )
UploadCreator . new ( file , filename ) . create_for ( user_id )
end
def valid_sha1? ( sha1 )
sha1 . present? && sha1 . length == Upload :: SHA1_LENGTH
2018-09-12 04:51:53 -04:00
end
end