DEV: Change upload verified column to be integer (#10643)

Per review https://review.discourse.org/t/dev-add-verified-to-uploads-and-fill-in-s3-inventory-10406/14180

Change the verified column for Upload to a verified_status integer column, to avoid having NULL as a weird implicit status.
This commit is contained in:
Martin Brennan 2020-09-17 13:35:29 +10:00 committed by GitHub
parent e313aa5a6e
commit 80268357e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 87 additions and 21 deletions

View File

@ -3,6 +3,10 @@
require "digest/sha1"
class Upload < ActiveRecord::Base
self.ignored_columns = [
"verified" # TODO(2020-12-10): remove
]
include ActionView::Helpers::NumberHelper
include HasUrl
@ -51,6 +55,14 @@ class Upload < ActiveRecord::Base
scope :by_users, -> { where("uploads.id > ?", SEEDED_ID_THRESHOLD) }
def self.verification_statuses
@verification_statuses ||= Enum.new(
unchecked: 1,
verified: 2,
invalid_etag: 3
)
end
def to_s
self.url
end
@ -449,6 +461,7 @@ end
# access_control_post_id :bigint
# original_sha1 :string
# verified :boolean
# verification_status :integer default(1), not null
#
# Indexes
#

View File

@ -0,0 +1,21 @@
# frozen_string_literal: true
class ChangeUploadsVerifiedToInteger < ActiveRecord::Migration[6.0]
def up
add_column :uploads, :verification_status, :integer, null: false, default: 1
Migration::ColumnDropper.mark_readonly(:uploads, :verified)
DB.exec(
<<~SQL
UPDATE uploads SET verification_status = CASE WHEN
verified THEN 2
WHEN NOT verified THEN 3
END
SQL
)
end
def down
remove_column :uploads, :verification_status
end
end

View File

@ -0,0 +1,16 @@
# frozen_string_literal: true
class AddIndexToUploadsVerificationStatus < ActiveRecord::Migration[6.0]
disable_ddl_transaction!
def up
execute <<~SQL
CREATE INDEX CONCURRENTLY IF NOT EXISTS
idx_uploads_on_verification_status ON uploads USING btree (verification_status)
SQL
end
def down
execute "DROP INDEX IF EXISTS idx_uploads_on_verification_status"
end
end

View File

@ -71,22 +71,34 @@ class S3Inventory
.joins("LEFT JOIN #{table_name} inventory2 ON inventory2.url = #{model.table_name}.url")
.where("inventory2.etag IS NOT NULL").pluck(:id)
# marking as verified/not verified
if model == Upload
# marking as verified/not verified
DB.exec(<<~SQL, inventory_date
sql_params = {
inventory_date: inventory_date,
unchecked: Upload.verification_statuses[:unchecked],
invalid_etag: Upload.verification_statuses[:invalid_etag],
verified: Upload.verification_statuses[:verified]
}
DB.exec(<<~SQL, sql_params)
UPDATE #{model.table_name}
SET verified = CASE when table_name_alias.etag IS NULL THEN false ELSE true END
SET verification_status = CASE WHEN table_name_alias.etag IS NULL
THEN :invalid_etag
ELSE :verified
END
FROM #{model.table_name} AS model_table
LEFT JOIN #{table_name} AS table_name_alias ON model_table.etag = table_name_alias.etag
LEFT JOIN #{table_name} AS table_name_alias ON
model_table.etag = table_name_alias.etag
WHERE model_table.id = #{model.table_name}.id
AND model_table.updated_at < ?
AND model_table.updated_at < :inventory_date
AND (
model_table.verified IS NULL OR
model_table.verified <> CASE when table_name_alias.etag IS NULL THEN false ELSE true END
model_table.verification_status = :unchecked OR
model_table.verification_status <> CASE WHEN table_name_alias.etag IS NULL
THEN :invalid_etag
ELSE :verified
END
)
AND model_table.id > #{model::SEEDED_ID_THRESHOLD}
SQL
)
SQL
end
if (missing_count = missing_uploads.count) > 0

View File

@ -1001,7 +1001,7 @@ def analyze_missing_s3
SELECT post_id, url, sha1, extension, uploads.id
FROM post_uploads pu
RIGHT JOIN uploads on uploads.id = pu.upload_id
WHERE NOT verified
WHERE verification_status = :invalid_etag
ORDER BY created_at
SQL
@ -1009,7 +1009,7 @@ def analyze_missing_s3
other = []
all = []
DB.query(sql).each do |r|
DB.query(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |r|
all << r
if r.post_id
lookup[r.post_id] ||= []
@ -1029,7 +1029,7 @@ def analyze_missing_s3
puts
end
missing_uploads = Upload.where(verified: false)
missing_uploads = Upload.where(verification_status: Upload.verification_statuses[:invalid_etag])
puts "Total missing uploads: #{missing_uploads.count}, newest is #{missing_uploads.maximum(:created_at)}"
puts "Total problem posts: #{lookup.keys.count} with #{lookup.values.sum { |a| a.length } } missing uploads"
puts "Other missing uploads count: #{other.count}"
@ -1067,7 +1067,9 @@ def analyze_missing_s3
end
def delete_missing_s3
missing = Upload.where(verified: false).order(:created_at)
missing = Upload.where(
verification_status: Upload.verification_statuses[:invalid_etag]
).order(:created_at)
count = missing.count
if count > 0
puts "The following uploads will be deleted from the database"
@ -1110,7 +1112,9 @@ def fix_missing_s3
Jobs.run_immediately!
puts "Attempting to download missing uploads and recreate"
ids = Upload.where(verified: false).pluck(:id)
ids = Upload.where(
verification_status: Upload.verification_statuses[:invalid_etag]
).pluck(:id)
ids.each do |id|
upload = Upload.find(id)
@ -1165,11 +1169,11 @@ def fix_missing_s3
SELECT post_id
FROM post_uploads pu
JOIN uploads on uploads.id = pu.upload_id
WHERE NOT verified
WHERE verification_status = :invalid_etag
ORDER BY post_id DESC
SQL
DB.query_single(sql).each do |post_id|
DB.query_single(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |post_id|
post = Post.find_by(id: post_id)
if post
post.rebake!

View File

@ -107,15 +107,15 @@ describe "S3Inventory" do
end
it "marks missing uploads as not verified and found uploads as verified. uploads not checked will be verified nil" do
expect(Upload.where(verified: nil).count).to eq(12)
expect(Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count).to eq(12)
output = capture_stdout do
inventory.backfill_etags_and_list_missing
end
verified = Upload.pluck(:verified)
expect(Upload.where(verified: true).count).to eq(3)
expect(Upload.where(verified: false).count).to eq(2)
expect(Upload.where(verified: nil).count).to eq(7)
verification_status = Upload.pluck(:verification_status)
expect(Upload.where(verification_status: Upload.verification_statuses[:verified]).count).to eq(3)
expect(Upload.where(verification_status: Upload.verification_statuses[:invalid_etag]).count).to eq(2)
expect(Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count).to eq(7)
end
it "does not affect the updated_at date of uploads" do