From 0472bd4adc527f26add0421b780c4e2e0e704eb9 Mon Sep 17 00:00:00 2001 From: Vinoth Kannan Date: Fri, 15 Feb 2019 00:34:35 +0530 Subject: [PATCH] FIX: Remove 'backfill_etags' keyword argument from 'uploads:missing' rake task And etags backfilling code is optimized --- lib/file_store/s3_store.rb | 6 +++--- lib/s3_inventory.rb | 14 +++++++------- lib/tasks/uploads.rake | 8 ++++---- spec/components/s3_inventory_spec.rb | 10 ++++++++-- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/lib/file_store/s3_store.rb b/lib/file_store/s3_store.rb index e51ea23e435..d4bb6647d29 100644 --- a/lib/file_store/s3_store.rb +++ b/lib/file_store/s3_store.rb @@ -123,11 +123,11 @@ module FileStore SiteSetting.Upload.s3_upload_bucket.downcase end - def list_missing_uploads(skip_optimized: false, backfill_etags: false) + def list_missing_uploads(skip_optimized: false) if SiteSetting.enable_s3_inventory require 's3_inventory' - S3Inventory.new(s3_helper, :upload).list_missing(backfill_etags: backfill_etags) - S3Inventory.new(s3_helper, :optimized).list_missing(backfill_etags: backfill_etags) unless skip_optimized + S3Inventory.new(s3_helper, :upload).list_missing + S3Inventory.new(s3_helper, :optimized).list_missing unless skip_optimized else list_missing(Upload, "original/") list_missing(OptimizedImage, "optimized/") unless skip_optimized diff --git a/lib/s3_inventory.rb b/lib/s3_inventory.rb index 8acb9b2051d..1625e4309eb 100644 --- a/lib/s3_inventory.rb +++ b/lib/s3_inventory.rb @@ -24,7 +24,7 @@ class S3Inventory end end - def list_missing(backfill_etags: false) + def list_missing if files.blank? error("Failed to list inventory from S3") return @@ -46,12 +46,12 @@ class S3Inventory end end - if backfill_etags - uploads = model.where(etag: nil).joins("INNER JOIN #{table_name} ON #{model.table_name}.url ILIKE '%' || #{table_name}.key") - uploads.select(:id, :"#{table_name}.etag").find_each do |upload| - model.where(id: upload.id).update_all(etag: upload.etag) - end - end + # backfilling etags + connection.async_exec("UPDATE #{model.table_name} + SET etag = #{table_name}.etag + FROM #{table_name} + WHERE #{model.table_name}.etag IS NULL + AND url ILIKE '%' || #{table_name}.key") uploads = (model == Upload) ? model.where("created_at < ?", inventory_date) : model missing_uploads = uploads.joins("LEFT JOIN #{table_name} ON #{table_name}.etag = #{model.table_name}.etag").where("#{table_name}.etag is NULL") diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake index ed11824becc..40b33e81e1c 100644 --- a/lib/tasks/uploads.rake +++ b/lib/tasks/uploads.rake @@ -482,16 +482,16 @@ end # list all missing uploads and optimized images task "uploads:missing" => :environment do if ENV["RAILS_DB"] - list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'], backfill_etags: ENV['BACKFILL_ETAGS']) + list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED']) else RailsMultisite::ConnectionManagement.each_connection do |db| - list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED'], backfill_etags: ENV['BACKFILL_ETAGS']) + list_missing_uploads(skip_optimized: ENV['SKIP_OPTIMIZED']) end end end -def list_missing_uploads(skip_optimized: false, backfill_etags: false) - Discourse.store.list_missing_uploads(skip_optimized: skip_optimized, backfill_etags: backfill_etags) +def list_missing_uploads(skip_optimized: false) + Discourse.store.list_missing_uploads(skip_optimized: skip_optimized) end ################################################################################ diff --git a/spec/components/s3_inventory_spec.rb b/spec/components/s3_inventory_spec.rb index 26e77725819..03d70aea070 100644 --- a/spec/components/s3_inventory_spec.rb +++ b/spec/components/s3_inventory_spec.rb @@ -77,14 +77,20 @@ describe "S3Inventory" do end it "should backfill etags to uploads table correctly" do - Fabricate(:upload, url: "//bucket.amazonaws.com/original/0184537a4f419224404d013414e913a4f56018f2.jpg", created_at: 2.days.ago) + files = [ + ["//bucket.amazonaws.com/original/0184537a4f419224404d013414e913a4f56018f2.jpg", "defcaac0b4aca535c284e95f30d608d0"], + ["//bucket.amazonaws.com/original/0789fbf5490babc68326b9cec90eeb0d6590db05.png", "25c02eaceef4cb779fc17030d33f7f06"] + ] + files.each { |file| Fabricate(:upload, url: file[0]) } inventory.expects(:download_inventory_files_to_tmp_directory) inventory.expects(:decompress_inventory_files) inventory.expects(:files).returns([{ key: "Key", filename: "#{csv_filename}.gz" }]).times(2) output = capture_stdout do - expect { inventory.list_missing(backfill_etags: true) }.to change { Upload.where(etag: nil).count }.by(-1) + expect { inventory.list_missing }.to change { Upload.where(etag: nil).count }.by(-2) end + + expect(Upload.order(:url).pluck(:url, :etag)).to eq(files) end end