FIX: don't use etags for post-upload verification (#21923)

They don't work for server-side encryption with customer keys, and so instead we just use Content-MD5 to ensure there was no corruption in transit, which is the best we can do.

See also: https://meta.discourse.org/t/s3-uploads-incompatible-with-server-side-encryption/266853
This commit is contained in:
Matt Palmer 2023-07-07 17:53:49 +10:00 committed by GitHub
parent ba53df5254
commit bd9c919e06
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 15 deletions

View File

@ -9,17 +9,11 @@ module FileStore
MISSING_UPLOADS_RAKE_TASK_NAME ||= "posts:missing_uploads" MISSING_UPLOADS_RAKE_TASK_NAME ||= "posts:missing_uploads"
UPLOAD_CONCURRENCY ||= 20 UPLOAD_CONCURRENCY ||= 20
def initialize( def initialize(s3_options:, dry_run: false, migrate_to_multisite: false)
s3_options:,
dry_run: false,
migrate_to_multisite: false,
skip_etag_verify: false
)
@s3_bucket = s3_options[:bucket] @s3_bucket = s3_options[:bucket]
@s3_client_options = s3_options[:client_options] @s3_client_options = s3_options[:client_options]
@dry_run = dry_run @dry_run = dry_run
@migrate_to_multisite = migrate_to_multisite @migrate_to_multisite = migrate_to_multisite
@skip_etag_verify = skip_etag_verify
@current_db = RailsMultisite::ConnectionManagement.current_db @current_db = RailsMultisite::ConnectionManagement.current_db
end end
@ -217,7 +211,7 @@ module FileStore
UPLOAD_CONCURRENCY.times.map do UPLOAD_CONCURRENCY.times.map do
Thread.new do Thread.new do
while obj = queue.pop while obj = queue.pop
if s3.put_object(obj[:options]).etag[obj[:etag]] if s3.put_object(obj[:options])
putc "." putc "."
lock.synchronize { synced += 1 } lock.synchronize { synced += 1 }
else else
@ -231,13 +225,14 @@ module FileStore
local_files.each do |file| local_files.each do |file|
path = File.join(public_directory, file) path = File.join(public_directory, file)
name = File.basename(path) name = File.basename(path)
etag = Digest::MD5.file(path).hexdigest unless @skip_etag_verify content_md5 = Digest::MD5.file(path).base64digest
key = file[file.index(prefix)..-1] key = file[file.index(prefix)..-1]
key.prepend(folder) if bucket_has_folder_path key.prepend(folder) if bucket_has_folder_path
original_path = file.sub("uploads/#{@current_db}", "") original_path = file.sub("uploads/#{@current_db}", "")
if s3_object = s3_objects.find { |obj| obj.key.ends_with?(original_path) } if (s3_object = s3_objects.find { |obj| obj.key.ends_with?(original_path) }) &&
next if File.size(path) == s3_object.size && (@skip_etag_verify || s3_object.etag[etag]) File.size(path) == s3_object.size
next
end end
options = { options = {
@ -245,6 +240,7 @@ module FileStore
body: File.open(path, "rb"), body: File.open(path, "rb"),
bucket: bucket, bucket: bucket,
content_type: MiniMime.lookup_by_filename(name)&.content_type, content_type: MiniMime.lookup_by_filename(name)&.content_type,
content_md5: content_md5,
key: key, key: key,
} }
@ -267,13 +263,11 @@ module FileStore
) )
end end
etag ||= Digest::MD5.file(path).hexdigest
if @dry_run if @dry_run
log "#{file} => #{options[:key]}" log "#{file} => #{options[:key]}"
synced += 1 synced += 1
else else
queue << { path: path, options: options, etag: etag } queue << { path: path, options: options, content_md5: content_md5 }
end end
end end

View File

@ -119,7 +119,6 @@ def create_migration
s3_options: FileStore::ToS3Migration.s3_options_from_env, s3_options: FileStore::ToS3Migration.s3_options_from_env,
dry_run: !!ENV["DRY_RUN"], dry_run: !!ENV["DRY_RUN"],
migrate_to_multisite: !!ENV["MIGRATE_TO_MULTISITE"], migrate_to_multisite: !!ENV["MIGRATE_TO_MULTISITE"],
skip_etag_verify: !!ENV["SKIP_ETAG_VERIFY"],
) )
end end