DEV: Change upload verified column to be integer (#10643)
Per review https://review.discourse.org/t/dev-add-verified-to-uploads-and-fill-in-s3-inventory-10406/14180 Change the verified column for Upload to a verified_status integer column, to avoid having NULL as a weird implicit status.
This commit is contained in:
parent
e313aa5a6e
commit
80268357e7
|
@ -3,6 +3,10 @@
|
|||
require "digest/sha1"
|
||||
|
||||
class Upload < ActiveRecord::Base
|
||||
self.ignored_columns = [
|
||||
"verified" # TODO(2020-12-10): remove
|
||||
]
|
||||
|
||||
include ActionView::Helpers::NumberHelper
|
||||
include HasUrl
|
||||
|
||||
|
@ -51,6 +55,14 @@ class Upload < ActiveRecord::Base
|
|||
|
||||
scope :by_users, -> { where("uploads.id > ?", SEEDED_ID_THRESHOLD) }
|
||||
|
||||
def self.verification_statuses
|
||||
@verification_statuses ||= Enum.new(
|
||||
unchecked: 1,
|
||||
verified: 2,
|
||||
invalid_etag: 3
|
||||
)
|
||||
end
|
||||
|
||||
def to_s
|
||||
self.url
|
||||
end
|
||||
|
@ -449,6 +461,7 @@ end
|
|||
# access_control_post_id :bigint
|
||||
# original_sha1 :string
|
||||
# verified :boolean
|
||||
# verification_status :integer default(1), not null
|
||||
#
|
||||
# Indexes
|
||||
#
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class ChangeUploadsVerifiedToInteger < ActiveRecord::Migration[6.0]
|
||||
def up
|
||||
add_column :uploads, :verification_status, :integer, null: false, default: 1
|
||||
Migration::ColumnDropper.mark_readonly(:uploads, :verified)
|
||||
|
||||
DB.exec(
|
||||
<<~SQL
|
||||
UPDATE uploads SET verification_status = CASE WHEN
|
||||
verified THEN 2
|
||||
WHEN NOT verified THEN 3
|
||||
END
|
||||
SQL
|
||||
)
|
||||
end
|
||||
|
||||
def down
|
||||
remove_column :uploads, :verification_status
|
||||
end
|
||||
end
|
|
@ -0,0 +1,16 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class AddIndexToUploadsVerificationStatus < ActiveRecord::Migration[6.0]
|
||||
disable_ddl_transaction!
|
||||
|
||||
def up
|
||||
execute <<~SQL
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS
|
||||
idx_uploads_on_verification_status ON uploads USING btree (verification_status)
|
||||
SQL
|
||||
end
|
||||
|
||||
def down
|
||||
execute "DROP INDEX IF EXISTS idx_uploads_on_verification_status"
|
||||
end
|
||||
end
|
|
@ -71,22 +71,34 @@ class S3Inventory
|
|||
.joins("LEFT JOIN #{table_name} inventory2 ON inventory2.url = #{model.table_name}.url")
|
||||
.where("inventory2.etag IS NOT NULL").pluck(:id)
|
||||
|
||||
# marking as verified/not verified
|
||||
if model == Upload
|
||||
# marking as verified/not verified
|
||||
DB.exec(<<~SQL, inventory_date
|
||||
sql_params = {
|
||||
inventory_date: inventory_date,
|
||||
unchecked: Upload.verification_statuses[:unchecked],
|
||||
invalid_etag: Upload.verification_statuses[:invalid_etag],
|
||||
verified: Upload.verification_statuses[:verified]
|
||||
}
|
||||
DB.exec(<<~SQL, sql_params)
|
||||
UPDATE #{model.table_name}
|
||||
SET verified = CASE when table_name_alias.etag IS NULL THEN false ELSE true END
|
||||
SET verification_status = CASE WHEN table_name_alias.etag IS NULL
|
||||
THEN :invalid_etag
|
||||
ELSE :verified
|
||||
END
|
||||
FROM #{model.table_name} AS model_table
|
||||
LEFT JOIN #{table_name} AS table_name_alias ON model_table.etag = table_name_alias.etag
|
||||
LEFT JOIN #{table_name} AS table_name_alias ON
|
||||
model_table.etag = table_name_alias.etag
|
||||
WHERE model_table.id = #{model.table_name}.id
|
||||
AND model_table.updated_at < ?
|
||||
AND model_table.updated_at < :inventory_date
|
||||
AND (
|
||||
model_table.verified IS NULL OR
|
||||
model_table.verified <> CASE when table_name_alias.etag IS NULL THEN false ELSE true END
|
||||
model_table.verification_status = :unchecked OR
|
||||
model_table.verification_status <> CASE WHEN table_name_alias.etag IS NULL
|
||||
THEN :invalid_etag
|
||||
ELSE :verified
|
||||
END
|
||||
)
|
||||
AND model_table.id > #{model::SEEDED_ID_THRESHOLD}
|
||||
SQL
|
||||
)
|
||||
SQL
|
||||
end
|
||||
|
||||
if (missing_count = missing_uploads.count) > 0
|
||||
|
|
|
@ -1001,7 +1001,7 @@ def analyze_missing_s3
|
|||
SELECT post_id, url, sha1, extension, uploads.id
|
||||
FROM post_uploads pu
|
||||
RIGHT JOIN uploads on uploads.id = pu.upload_id
|
||||
WHERE NOT verified
|
||||
WHERE verification_status = :invalid_etag
|
||||
ORDER BY created_at
|
||||
SQL
|
||||
|
||||
|
@ -1009,7 +1009,7 @@ def analyze_missing_s3
|
|||
other = []
|
||||
all = []
|
||||
|
||||
DB.query(sql).each do |r|
|
||||
DB.query(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |r|
|
||||
all << r
|
||||
if r.post_id
|
||||
lookup[r.post_id] ||= []
|
||||
|
@ -1029,7 +1029,7 @@ def analyze_missing_s3
|
|||
puts
|
||||
end
|
||||
|
||||
missing_uploads = Upload.where(verified: false)
|
||||
missing_uploads = Upload.where(verification_status: Upload.verification_statuses[:invalid_etag])
|
||||
puts "Total missing uploads: #{missing_uploads.count}, newest is #{missing_uploads.maximum(:created_at)}"
|
||||
puts "Total problem posts: #{lookup.keys.count} with #{lookup.values.sum { |a| a.length } } missing uploads"
|
||||
puts "Other missing uploads count: #{other.count}"
|
||||
|
@ -1067,7 +1067,9 @@ def analyze_missing_s3
|
|||
end
|
||||
|
||||
def delete_missing_s3
|
||||
missing = Upload.where(verified: false).order(:created_at)
|
||||
missing = Upload.where(
|
||||
verification_status: Upload.verification_statuses[:invalid_etag]
|
||||
).order(:created_at)
|
||||
count = missing.count
|
||||
if count > 0
|
||||
puts "The following uploads will be deleted from the database"
|
||||
|
@ -1110,7 +1112,9 @@ def fix_missing_s3
|
|||
Jobs.run_immediately!
|
||||
|
||||
puts "Attempting to download missing uploads and recreate"
|
||||
ids = Upload.where(verified: false).pluck(:id)
|
||||
ids = Upload.where(
|
||||
verification_status: Upload.verification_statuses[:invalid_etag]
|
||||
).pluck(:id)
|
||||
ids.each do |id|
|
||||
upload = Upload.find(id)
|
||||
|
||||
|
@ -1165,11 +1169,11 @@ def fix_missing_s3
|
|||
SELECT post_id
|
||||
FROM post_uploads pu
|
||||
JOIN uploads on uploads.id = pu.upload_id
|
||||
WHERE NOT verified
|
||||
WHERE verification_status = :invalid_etag
|
||||
ORDER BY post_id DESC
|
||||
SQL
|
||||
|
||||
DB.query_single(sql).each do |post_id|
|
||||
DB.query_single(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |post_id|
|
||||
post = Post.find_by(id: post_id)
|
||||
if post
|
||||
post.rebake!
|
||||
|
|
|
@ -107,15 +107,15 @@ describe "S3Inventory" do
|
|||
end
|
||||
|
||||
it "marks missing uploads as not verified and found uploads as verified. uploads not checked will be verified nil" do
|
||||
expect(Upload.where(verified: nil).count).to eq(12)
|
||||
expect(Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count).to eq(12)
|
||||
output = capture_stdout do
|
||||
inventory.backfill_etags_and_list_missing
|
||||
end
|
||||
|
||||
verified = Upload.pluck(:verified)
|
||||
expect(Upload.where(verified: true).count).to eq(3)
|
||||
expect(Upload.where(verified: false).count).to eq(2)
|
||||
expect(Upload.where(verified: nil).count).to eq(7)
|
||||
verification_status = Upload.pluck(:verification_status)
|
||||
expect(Upload.where(verification_status: Upload.verification_statuses[:verified]).count).to eq(3)
|
||||
expect(Upload.where(verification_status: Upload.verification_statuses[:invalid_etag]).count).to eq(2)
|
||||
expect(Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count).to eq(7)
|
||||
end
|
||||
|
||||
it "does not affect the updated_at date of uploads" do
|
||||
|
|
Loading…
Reference in New Issue