From 80268357e725ab61da2313577926d252d5e3bc9a Mon Sep 17 00:00:00 2001 From: Martin Brennan Date: Thu, 17 Sep 2020 13:35:29 +1000 Subject: [PATCH] DEV: Change upload verified column to be integer (#10643) Per review https://review.discourse.org/t/dev-add-verified-to-uploads-and-fill-in-s3-inventory-10406/14180 Change the verified column for Upload to a verified_status integer column, to avoid having NULL as a weird implicit status. --- app/models/upload.rb | 13 ++++++++ ...1633_change_uploads_verified_to_integer.rb | 21 +++++++++++++ ...dd_index_to_uploads_verification_status.rb | 16 ++++++++++ lib/s3_inventory.rb | 30 +++++++++++++------ lib/tasks/uploads.rake | 18 ++++++----- spec/components/s3_inventory_spec.rb | 10 +++---- 6 files changed, 87 insertions(+), 21 deletions(-) create mode 100644 db/migrate/20200910051633_change_uploads_verified_to_integer.rb create mode 100644 db/migrate/20200911031738_add_index_to_uploads_verification_status.rb diff --git a/app/models/upload.rb b/app/models/upload.rb index 24eb7d9fd30..1656fdb5ec9 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -3,6 +3,10 @@ require "digest/sha1" class Upload < ActiveRecord::Base + self.ignored_columns = [ + "verified" # TODO(2020-12-10): remove + ] + include ActionView::Helpers::NumberHelper include HasUrl @@ -51,6 +55,14 @@ class Upload < ActiveRecord::Base scope :by_users, -> { where("uploads.id > ?", SEEDED_ID_THRESHOLD) } + def self.verification_statuses + @verification_statuses ||= Enum.new( + unchecked: 1, + verified: 2, + invalid_etag: 3 + ) + end + def to_s self.url end @@ -449,6 +461,7 @@ end # access_control_post_id :bigint # original_sha1 :string # verified :boolean +# verification_status :integer default(1), not null # # Indexes # diff --git a/db/migrate/20200910051633_change_uploads_verified_to_integer.rb b/db/migrate/20200910051633_change_uploads_verified_to_integer.rb new file mode 100644 index 00000000000..67ef689093a --- /dev/null +++ b/db/migrate/20200910051633_change_uploads_verified_to_integer.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +class ChangeUploadsVerifiedToInteger < ActiveRecord::Migration[6.0] + def up + add_column :uploads, :verification_status, :integer, null: false, default: 1 + Migration::ColumnDropper.mark_readonly(:uploads, :verified) + + DB.exec( + <<~SQL + UPDATE uploads SET verification_status = CASE WHEN + verified THEN 2 + WHEN NOT verified THEN 3 + END + SQL + ) + end + + def down + remove_column :uploads, :verification_status + end +end diff --git a/db/migrate/20200911031738_add_index_to_uploads_verification_status.rb b/db/migrate/20200911031738_add_index_to_uploads_verification_status.rb new file mode 100644 index 00000000000..6d8637d2b5c --- /dev/null +++ b/db/migrate/20200911031738_add_index_to_uploads_verification_status.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +class AddIndexToUploadsVerificationStatus < ActiveRecord::Migration[6.0] + disable_ddl_transaction! + + def up + execute <<~SQL + CREATE INDEX CONCURRENTLY IF NOT EXISTS + idx_uploads_on_verification_status ON uploads USING btree (verification_status) + SQL + end + + def down + execute "DROP INDEX IF EXISTS idx_uploads_on_verification_status" + end +end diff --git a/lib/s3_inventory.rb b/lib/s3_inventory.rb index 3b1c5641ecf..943ebca9fa4 100644 --- a/lib/s3_inventory.rb +++ b/lib/s3_inventory.rb @@ -71,22 +71,34 @@ class S3Inventory .joins("LEFT JOIN #{table_name} inventory2 ON inventory2.url = #{model.table_name}.url") .where("inventory2.etag IS NOT NULL").pluck(:id) + # marking as verified/not verified if model == Upload - # marking as verified/not verified - DB.exec(<<~SQL, inventory_date + sql_params = { + inventory_date: inventory_date, + unchecked: Upload.verification_statuses[:unchecked], + invalid_etag: Upload.verification_statuses[:invalid_etag], + verified: Upload.verification_statuses[:verified] + } + DB.exec(<<~SQL, sql_params) UPDATE #{model.table_name} - SET verified = CASE when table_name_alias.etag IS NULL THEN false ELSE true END + SET verification_status = CASE WHEN table_name_alias.etag IS NULL + THEN :invalid_etag + ELSE :verified + END FROM #{model.table_name} AS model_table - LEFT JOIN #{table_name} AS table_name_alias ON model_table.etag = table_name_alias.etag + LEFT JOIN #{table_name} AS table_name_alias ON + model_table.etag = table_name_alias.etag WHERE model_table.id = #{model.table_name}.id - AND model_table.updated_at < ? + AND model_table.updated_at < :inventory_date AND ( - model_table.verified IS NULL OR - model_table.verified <> CASE when table_name_alias.etag IS NULL THEN false ELSE true END + model_table.verification_status = :unchecked OR + model_table.verification_status <> CASE WHEN table_name_alias.etag IS NULL + THEN :invalid_etag + ELSE :verified + END ) AND model_table.id > #{model::SEEDED_ID_THRESHOLD} - SQL - ) + SQL end if (missing_count = missing_uploads.count) > 0 diff --git a/lib/tasks/uploads.rake b/lib/tasks/uploads.rake index 89e3892875f..2587009e60d 100644 --- a/lib/tasks/uploads.rake +++ b/lib/tasks/uploads.rake @@ -1001,7 +1001,7 @@ def analyze_missing_s3 SELECT post_id, url, sha1, extension, uploads.id FROM post_uploads pu RIGHT JOIN uploads on uploads.id = pu.upload_id - WHERE NOT verified + WHERE verification_status = :invalid_etag ORDER BY created_at SQL @@ -1009,7 +1009,7 @@ def analyze_missing_s3 other = [] all = [] - DB.query(sql).each do |r| + DB.query(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |r| all << r if r.post_id lookup[r.post_id] ||= [] @@ -1029,7 +1029,7 @@ def analyze_missing_s3 puts end - missing_uploads = Upload.where(verified: false) + missing_uploads = Upload.where(verification_status: Upload.verification_statuses[:invalid_etag]) puts "Total missing uploads: #{missing_uploads.count}, newest is #{missing_uploads.maximum(:created_at)}" puts "Total problem posts: #{lookup.keys.count} with #{lookup.values.sum { |a| a.length } } missing uploads" puts "Other missing uploads count: #{other.count}" @@ -1067,7 +1067,9 @@ def analyze_missing_s3 end def delete_missing_s3 - missing = Upload.where(verified: false).order(:created_at) + missing = Upload.where( + verification_status: Upload.verification_statuses[:invalid_etag] + ).order(:created_at) count = missing.count if count > 0 puts "The following uploads will be deleted from the database" @@ -1110,7 +1112,9 @@ def fix_missing_s3 Jobs.run_immediately! puts "Attempting to download missing uploads and recreate" - ids = Upload.where(verified: false).pluck(:id) + ids = Upload.where( + verification_status: Upload.verification_statuses[:invalid_etag] + ).pluck(:id) ids.each do |id| upload = Upload.find(id) @@ -1165,11 +1169,11 @@ def fix_missing_s3 SELECT post_id FROM post_uploads pu JOIN uploads on uploads.id = pu.upload_id - WHERE NOT verified + WHERE verification_status = :invalid_etag ORDER BY post_id DESC SQL - DB.query_single(sql).each do |post_id| + DB.query_single(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |post_id| post = Post.find_by(id: post_id) if post post.rebake! diff --git a/spec/components/s3_inventory_spec.rb b/spec/components/s3_inventory_spec.rb index 2f4122d0145..9143afff4a7 100644 --- a/spec/components/s3_inventory_spec.rb +++ b/spec/components/s3_inventory_spec.rb @@ -107,15 +107,15 @@ describe "S3Inventory" do end it "marks missing uploads as not verified and found uploads as verified. uploads not checked will be verified nil" do - expect(Upload.where(verified: nil).count).to eq(12) + expect(Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count).to eq(12) output = capture_stdout do inventory.backfill_etags_and_list_missing end - verified = Upload.pluck(:verified) - expect(Upload.where(verified: true).count).to eq(3) - expect(Upload.where(verified: false).count).to eq(2) - expect(Upload.where(verified: nil).count).to eq(7) + verification_status = Upload.pluck(:verification_status) + expect(Upload.where(verification_status: Upload.verification_statuses[:verified]).count).to eq(3) + expect(Upload.where(verification_status: Upload.verification_statuses[:invalid_etag]).count).to eq(2) + expect(Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count).to eq(7) end it "does not affect the updated_at date of uploads" do