DEV: Add verified to uploads and fill in S3 inventory (#10406)
When we run the S3 inventory, mark uploads that exist as verified true, those that don't as verified false, and uploads not included in the check / not yet checked as verified nil.
This commit is contained in:
parent
cfa37d1670
commit
b950b3fb3f
|
@ -0,0 +1,7 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class AddVerifiedColumnToUploads < ActiveRecord::Migration[6.0]
|
||||
def change
|
||||
add_column :uploads, :verified, :boolean, null: true
|
||||
end
|
||||
end
|
|
@ -67,6 +67,23 @@ class S3Inventory
|
|||
.joins("LEFT JOIN #{table_name} ON #{table_name}.etag = #{model.table_name}.etag")
|
||||
.where("#{table_name}.etag IS NULL")
|
||||
|
||||
# marking as verified/not verified
|
||||
id_threshold_clause = model == Upload ? " AND model_table.id > #{model::SEEDED_ID_THRESHOLD}" : ""
|
||||
DB.exec(<<~SQL, inventory_date
|
||||
UPDATE #{model.table_name}
|
||||
SET verified = CASE when table_name_alias.etag IS NULL THEN false ELSE true END
|
||||
FROM #{model.table_name} AS model_table
|
||||
LEFT JOIN #{table_name} AS table_name_alias ON model_table.etag = table_name_alias.etag
|
||||
WHERE model_table.id = #{model.table_name}.id
|
||||
AND model_table.updated_at < ?
|
||||
AND (
|
||||
model_table.verified IS NULL OR
|
||||
model_table.verified <> CASE when table_name_alias.etag IS NULL THEN false ELSE true END
|
||||
)
|
||||
#{id_threshold_clause}
|
||||
SQL
|
||||
)
|
||||
|
||||
if (missing_count = missing_uploads.count) > 0
|
||||
missing_uploads.select(:id, :url).find_each do |upload|
|
||||
log upload.url
|
||||
|
|
|
@ -58,27 +58,57 @@ describe "S3Inventory" do
|
|||
expect(output).to eq("Failed to list inventory from S3\n")
|
||||
end
|
||||
|
||||
it "should display missing uploads correctly" do
|
||||
freeze_time
|
||||
describe "verifying uploads" do
|
||||
before do
|
||||
freeze_time
|
||||
|
||||
CSV.foreach(csv_filename, headers: false) do |row|
|
||||
next unless row[S3Inventory::CSV_KEY_INDEX].include?("default")
|
||||
Fabricate(:upload, etag: row[S3Inventory::CSV_ETAG_INDEX], updated_at: 2.days.ago)
|
||||
CSV.foreach(csv_filename, headers: false) do |row|
|
||||
next unless row[S3Inventory::CSV_KEY_INDEX].include?("default")
|
||||
Fabricate(:upload, etag: row[S3Inventory::CSV_ETAG_INDEX], updated_at: 2.days.ago)
|
||||
end
|
||||
|
||||
@upload1 = Fabricate(:upload, etag: "ETag", updated_at: 1.days.ago)
|
||||
@upload2 = Fabricate(:upload, etag: "ETag2", updated_at: Time.now)
|
||||
@no_etag = Fabricate(:upload, updated_at: 2.days.ago)
|
||||
|
||||
inventory.expects(:files).returns([{ key: "Key", filename: "#{csv_filename}.gz" }]).times(3)
|
||||
inventory.expects(:inventory_date).times(2).returns(Time.now)
|
||||
end
|
||||
|
||||
upload = Fabricate(:upload, etag: "ETag", updated_at: 1.days.ago)
|
||||
Fabricate(:upload, etag: "ETag2", updated_at: Time.now)
|
||||
no_etag = Fabricate(:upload, updated_at: 2.days.ago)
|
||||
it "should display missing uploads correctly" do
|
||||
output = capture_stdout do
|
||||
inventory.backfill_etags_and_list_missing
|
||||
end
|
||||
|
||||
inventory.expects(:files).returns([{ key: "Key", filename: "#{csv_filename}.gz" }]).times(3)
|
||||
inventory.expects(:inventory_date).returns(Time.now)
|
||||
|
||||
output = capture_stdout do
|
||||
inventory.backfill_etags_and_list_missing
|
||||
expect(output).to eq("#{@upload1.url}\n#{@no_etag.url}\n2 of 5 uploads are missing\n")
|
||||
expect(Discourse.stats.get("missing_s3_uploads")).to eq(2)
|
||||
end
|
||||
|
||||
expect(output).to eq("#{upload.url}\n#{no_etag.url}\n2 of 5 uploads are missing\n")
|
||||
expect(Discourse.stats.get("missing_s3_uploads")).to eq(2)
|
||||
it "marks missing uploads as not verified and found uploads as verified. uploads not checked will be verified nil" do
|
||||
expect(Upload.where(verified: nil).count).to eq(12)
|
||||
output = capture_stdout do
|
||||
inventory.backfill_etags_and_list_missing
|
||||
end
|
||||
|
||||
verified = Upload.pluck(:verified)
|
||||
expect(Upload.where(verified: true).count).to eq(3)
|
||||
expect(Upload.where(verified: false).count).to eq(2)
|
||||
expect(Upload.where(verified: nil).count).to eq(7)
|
||||
end
|
||||
|
||||
it "does not affect the updated_at date of uploads" do
|
||||
upload_1_updated = @upload1.updated_at
|
||||
upload_2_updated = @upload2.updated_at
|
||||
no_etag_updated = @no_etag.updated_at
|
||||
|
||||
output = capture_stdout do
|
||||
inventory.backfill_etags_and_list_missing
|
||||
end
|
||||
|
||||
expect(@upload1.reload.updated_at).to eq_time(upload_1_updated)
|
||||
expect(@upload2.reload.updated_at).to eq_time(upload_2_updated)
|
||||
expect(@no_etag.reload.updated_at).to eq_time(no_etag_updated)
|
||||
end
|
||||
end
|
||||
|
||||
it "should backfill etags to uploads table correctly" do
|
||||
|
|
Loading…
Reference in New Issue