FIX: `S3Inventory` to ignore files older than last backup restore date (#27166)
This commit updates `S3Inventory#files` to ignore S3 inventory files which have a `last_modified` timestamp which are not at least 2 days older than `BackupMetadata.last_restore_date` timestamp. This check was previously only in `Jobs::EnsureS3UploadsExistence` but `S3Inventory` can also be used via Rake tasks so this protection needs to be in `S3Inventory` and not in the scheduled job.
This commit is contained in:
parent
a4c5f85b10
commit
df16ab0758
|
@ -4,8 +4,6 @@ module Jobs
|
|||
class EnsureS3UploadsExistence < ::Jobs::Scheduled
|
||||
every 1.day
|
||||
|
||||
WAIT_AFTER_RESTORE_HOURS = 48
|
||||
|
||||
def perform(*args)
|
||||
super
|
||||
ensure
|
||||
|
@ -28,7 +26,7 @@ module Jobs
|
|||
end
|
||||
|
||||
def execute(args)
|
||||
return if !executable?
|
||||
return if !SiteSetting.enable_s3_inventory
|
||||
require "s3_inventory"
|
||||
|
||||
if !@db_inventories && Rails.configuration.multisite && GlobalSetting.use_s3?
|
||||
|
@ -48,15 +46,5 @@ module Jobs
|
|||
S3Inventory.new(s3_helper, :upload).backfill_etags_and_list_missing
|
||||
end
|
||||
end
|
||||
|
||||
def executable?
|
||||
return false if !SiteSetting.enable_s3_inventory
|
||||
|
||||
if last_restore_date = BackupMetadata.last_restore_date
|
||||
return false if last_restore_date > WAIT_AFTER_RESTORE_HOURS.hours.ago
|
||||
end
|
||||
|
||||
true
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -27,6 +27,7 @@ class S3Helper
|
|||
|
||||
def initialize(s3_bucket_name, tombstone_prefix = "", options = {})
|
||||
@s3_client = options.delete(:client)
|
||||
@s3_bucket = options.delete(:bucket)
|
||||
@s3_options = default_s3_options.merge(options)
|
||||
|
||||
@s3_bucket_name, @s3_bucket_folder_path =
|
||||
|
|
|
@ -6,11 +6,12 @@ require "csv"
|
|||
class S3Inventory
|
||||
attr_reader :type, :model, :inventory_date
|
||||
|
||||
CSV_KEY_INDEX ||= 1
|
||||
CSV_ETAG_INDEX ||= 2
|
||||
INVENTORY_PREFIX ||= "inventory"
|
||||
INVENTORY_VERSION ||= "1"
|
||||
INVENTORY_LAG ||= 2.days
|
||||
CSV_KEY_INDEX = 1
|
||||
CSV_ETAG_INDEX = 2
|
||||
INVENTORY_PREFIX = "inventory"
|
||||
INVENTORY_VERSION = "1"
|
||||
INVENTORY_LAG = 2.days
|
||||
WAIT_AFTER_RESTORE_DAYS = 2
|
||||
|
||||
def initialize(s3_helper, type, preloaded_inventory_file: nil, preloaded_inventory_date: nil)
|
||||
@s3_helper = s3_helper
|
||||
|
@ -41,11 +42,13 @@ class S3Inventory
|
|||
download_and_decompress_files if !@preloaded_inventory_file
|
||||
|
||||
multisite_prefix = Discourse.store.upload_path
|
||||
|
||||
ActiveRecord::Base.transaction do
|
||||
begin
|
||||
connection.exec(
|
||||
"CREATE TEMP TABLE #{table_name}(url text UNIQUE, etag text, PRIMARY KEY(etag, url))",
|
||||
)
|
||||
|
||||
connection.copy_data("COPY #{table_name} FROM STDIN CSV") do
|
||||
for_each_inventory_row do |row|
|
||||
key = row[CSV_KEY_INDEX]
|
||||
|
@ -258,17 +261,28 @@ class S3Inventory
|
|||
|
||||
def files
|
||||
return if @preloaded_inventory_file
|
||||
|
||||
@files ||=
|
||||
begin
|
||||
symlink_file = unsorted_files.sort_by { |file| -file.last_modified.to_i }.first
|
||||
|
||||
return [] if symlink_file.blank?
|
||||
|
||||
if BackupMetadata.last_restore_date.present? &&
|
||||
(symlink_file.last_modified - WAIT_AFTER_RESTORE_DAYS.days) <
|
||||
BackupMetadata.last_restore_date
|
||||
return []
|
||||
end
|
||||
|
||||
@inventory_date = symlink_file.last_modified - INVENTORY_LAG
|
||||
log "Downloading symlink file to tmp directory..."
|
||||
failure_message = "Failed to download symlink file to tmp directory."
|
||||
filename = File.join(tmp_directory, File.basename(symlink_file.key))
|
||||
|
||||
@s3_helper.download_file(symlink_file.key, filename, failure_message)
|
||||
|
||||
return [] if !File.exist?(filename)
|
||||
|
||||
File
|
||||
.readlines(filename)
|
||||
.map do |key|
|
||||
|
|
|
@ -13,20 +13,6 @@ RSpec.describe Jobs::EnsureS3UploadsExistence do
|
|||
S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).once
|
||||
job.execute({})
|
||||
end
|
||||
|
||||
it "doesn't execute when the site was restored within the last 48 hours" do
|
||||
S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).never
|
||||
BackupMetadata.update_last_restore_date(47.hours.ago)
|
||||
|
||||
job.execute({})
|
||||
end
|
||||
|
||||
it "executes when the site was restored more than 48 hours ago" do
|
||||
S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).once
|
||||
BackupMetadata.update_last_restore_date(49.hours.ago)
|
||||
|
||||
job.execute({})
|
||||
end
|
||||
end
|
||||
|
||||
context "with S3 inventory disabled" do
|
||||
|
|
|
@ -4,53 +4,17 @@ require "s3_helper"
|
|||
require "s3_inventory"
|
||||
require "file_store/s3_store"
|
||||
|
||||
RSpec.describe "S3Inventory" do
|
||||
RSpec.describe S3Inventory do
|
||||
let(:client) { Aws::S3::Client.new(stub_responses: true) }
|
||||
let(:helper) { S3Helper.new(SiteSetting.Upload.s3_upload_bucket.downcase, "", client: client) }
|
||||
let(:resource) { Aws::S3::Resource.new(client: client) }
|
||||
let(:bucket) { resource.bucket(SiteSetting.Upload.s3_upload_bucket.downcase) }
|
||||
let(:helper) { S3Helper.new(bucket.name, "", client: client, bucket: bucket) }
|
||||
let(:inventory) { S3Inventory.new(helper, :upload) }
|
||||
let(:csv_filename) { "#{Rails.root}/spec/fixtures/csv/s3_inventory.csv" }
|
||||
|
||||
before do
|
||||
setup_s3
|
||||
SiteSetting.enable_s3_inventory = true
|
||||
|
||||
client.stub_responses(
|
||||
:list_objects,
|
||||
->(context) do
|
||||
expect(context.params[:prefix]).to eq(
|
||||
"#{S3Inventory::INVENTORY_PREFIX}/#{S3Inventory::INVENTORY_VERSION}/bucket/original/hive",
|
||||
)
|
||||
|
||||
{
|
||||
contents: [
|
||||
{
|
||||
etag: "\"70ee1738b6b21e2c8a43f3a5ab0eee71\"",
|
||||
key: "example1.csv.gz",
|
||||
last_modified: Time.parse("2014-11-21T19:40:05.000Z"),
|
||||
owner: {
|
||||
display_name: "myname",
|
||||
id: "12345example25102679df27bb0ae12b3f85be6f290b936c4393484be31bebcc",
|
||||
},
|
||||
size: 11,
|
||||
storage_class: "STANDARD",
|
||||
},
|
||||
{
|
||||
etag: "\"9c8af9a76df052144598c115ef33e511\"",
|
||||
key: "example2.csv.gz",
|
||||
last_modified: Time.parse("2013-11-15T01:10:49.000Z"),
|
||||
owner: {
|
||||
display_name: "myname",
|
||||
id: "12345example25102679df27bb0ae12b3f85be6f290b936c4393484be31bebcc",
|
||||
},
|
||||
size: 713_193,
|
||||
storage_class: "STANDARD",
|
||||
},
|
||||
],
|
||||
next_marker: "eyJNYXJrZXIiOiBudWxsLCAiYm90b190cnVuY2F0ZV9hbW91bnQiOiAyfQ==",
|
||||
}
|
||||
end,
|
||||
)
|
||||
|
||||
inventory.stubs(:cleanup!)
|
||||
end
|
||||
|
||||
|
@ -162,6 +126,58 @@ RSpec.describe "S3Inventory" do
|
|||
expect(Upload.by_users.order(:url).pluck(:url, :etag)).to eq(files)
|
||||
end
|
||||
|
||||
context "when site was restored from a backup" do
|
||||
before do
|
||||
freeze_time
|
||||
BackupMetadata.update_last_restore_date(Time.now)
|
||||
end
|
||||
|
||||
it "should run if inventory files are at least #{described_class::WAIT_AFTER_RESTORE_DAYS.days} days older than the last restore date" do
|
||||
client.stub_responses(
|
||||
:list_objects_v2,
|
||||
{
|
||||
contents: [
|
||||
{
|
||||
key: "symlink.txt",
|
||||
last_modified:
|
||||
BackupMetadata.last_restore_date + described_class::WAIT_AFTER_RESTORE_DAYS.days,
|
||||
size: 1,
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
client.expects(:get_object).once
|
||||
|
||||
capture_stdout do
|
||||
inventory = described_class.new(helper, :upload)
|
||||
inventory.backfill_etags_and_list_missing
|
||||
end
|
||||
end
|
||||
|
||||
it "should not run if inventory files are not at least #{described_class::WAIT_AFTER_RESTORE_DAYS.days} days older than the last restore date" do
|
||||
client.stub_responses(
|
||||
:list_objects_v2,
|
||||
{
|
||||
contents: [
|
||||
{
|
||||
key: "symlink.txt",
|
||||
last_modified: BackupMetadata.last_restore_date + 1.day,
|
||||
size: 1,
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
client.expects(:get_object).never
|
||||
|
||||
capture_stdout do
|
||||
inventory = described_class.new(helper, :upload)
|
||||
inventory.backfill_etags_and_list_missing
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
it "should work when passed preloaded data" do
|
||||
freeze_time
|
||||
|
||||
|
|
Loading…
Reference in New Issue