2019-04-29 20:27:42 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2019-01-31 23:40:48 -05:00
|
|
|
require "s3_helper"
|
|
|
|
require "s3_inventory"
|
|
|
|
require "file_store/s3_store"
|
|
|
|
|
2022-07-27 22:27:38 -04:00
|
|
|
RSpec.describe "S3Inventory" do
|
2019-01-31 23:40:48 -05:00
|
|
|
let(:client) { Aws::S3::Client.new(stub_responses: true) }
|
|
|
|
let(:helper) { S3Helper.new(SiteSetting.Upload.s3_upload_bucket.downcase, "", client: client) }
|
|
|
|
let(:inventory) { S3Inventory.new(helper, :upload) }
|
|
|
|
let(:csv_filename) { "#{Rails.root}/spec/fixtures/csv/s3_inventory.csv" }
|
|
|
|
|
|
|
|
before do
|
2020-09-14 07:32:25 -04:00
|
|
|
setup_s3
|
2019-01-31 23:40:48 -05:00
|
|
|
SiteSetting.enable_s3_inventory = true
|
|
|
|
|
2019-02-06 08:46:08 -05:00
|
|
|
client.stub_responses(
|
|
|
|
:list_objects,
|
2023-11-29 00:38:07 -05:00
|
|
|
->(context) do
|
2019-02-13 17:11:52 -05:00
|
|
|
expect(context.params[:prefix]).to eq(
|
|
|
|
"#{S3Inventory::INVENTORY_PREFIX}/#{S3Inventory::INVENTORY_VERSION}/bucket/original/hive",
|
|
|
|
)
|
2023-01-09 06:18:21 -05:00
|
|
|
|
2019-02-06 08:46:08 -05:00
|
|
|
{
|
|
|
|
contents: [
|
|
|
|
{
|
|
|
|
etag: "\"70ee1738b6b21e2c8a43f3a5ab0eee71\"",
|
|
|
|
key: "example1.csv.gz",
|
|
|
|
last_modified: Time.parse("2014-11-21T19:40:05.000Z"),
|
|
|
|
owner: {
|
|
|
|
display_name: "myname",
|
|
|
|
id: "12345example25102679df27bb0ae12b3f85be6f290b936c4393484be31bebcc",
|
2023-01-09 06:18:21 -05:00
|
|
|
},
|
2019-02-06 08:46:08 -05:00
|
|
|
size: 11,
|
|
|
|
storage_class: "STANDARD",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
etag: "\"9c8af9a76df052144598c115ef33e511\"",
|
|
|
|
key: "example2.csv.gz",
|
|
|
|
last_modified: Time.parse("2013-11-15T01:10:49.000Z"),
|
|
|
|
owner: {
|
|
|
|
display_name: "myname",
|
|
|
|
id: "12345example25102679df27bb0ae12b3f85be6f290b936c4393484be31bebcc",
|
2023-01-09 06:18:21 -05:00
|
|
|
},
|
2019-02-06 08:46:08 -05:00
|
|
|
size: 713_193,
|
|
|
|
storage_class: "STANDARD",
|
|
|
|
},
|
|
|
|
],
|
|
|
|
next_marker: "eyJNYXJrZXIiOiBudWxsLCAiYm90b190cnVuY2F0ZV9hbW91bnQiOiAyfQ==",
|
|
|
|
}
|
2023-11-29 00:38:07 -05:00
|
|
|
end,
|
2019-02-06 08:46:08 -05:00
|
|
|
)
|
2019-08-13 01:59:31 -04:00
|
|
|
|
|
|
|
inventory.stubs(:cleanup!)
|
2019-01-31 23:40:48 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
it "should raise error if an inventory file is not found" do
|
|
|
|
client.stub_responses(:list_objects, contents: [])
|
2019-02-19 11:24:35 -05:00
|
|
|
output = capture_stdout { inventory.backfill_etags_and_list_missing }
|
2019-01-31 23:40:48 -05:00
|
|
|
expect(output).to eq("Failed to list inventory from S3\n")
|
|
|
|
end
|
|
|
|
|
2020-08-11 00:43:51 -04:00
|
|
|
describe "verifying uploads" do
|
|
|
|
before do
|
|
|
|
freeze_time
|
2019-01-31 23:40:48 -05:00
|
|
|
|
2020-08-11 00:43:51 -04:00
|
|
|
CSV.foreach(csv_filename, headers: false) do |row|
|
|
|
|
next unless row[S3Inventory::CSV_KEY_INDEX].include?("default")
|
2020-08-12 19:30:28 -04:00
|
|
|
Fabricate(
|
|
|
|
:upload,
|
|
|
|
etag: row[S3Inventory::CSV_ETAG_INDEX],
|
|
|
|
url: File.join(Discourse.store.absolute_base_url, row[S3Inventory::CSV_KEY_INDEX]),
|
|
|
|
updated_at: 2.days.ago,
|
|
|
|
)
|
2020-08-11 00:43:51 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
@upload1 = Fabricate(:upload, etag: "ETag", updated_at: 1.days.ago)
|
|
|
|
@upload2 = Fabricate(:upload, etag: "ETag2", updated_at: Time.now)
|
|
|
|
@no_etag = Fabricate(:upload, updated_at: 2.days.ago)
|
|
|
|
|
|
|
|
inventory.expects(:files).returns([{ key: "Key", filename: "#{csv_filename}.gz" }]).times(3)
|
|
|
|
inventory.expects(:inventory_date).times(2).returns(Time.now)
|
2019-01-31 23:40:48 -05:00
|
|
|
end
|
2019-03-13 05:39:07 -04:00
|
|
|
|
2020-08-11 00:43:51 -04:00
|
|
|
it "should display missing uploads correctly" do
|
|
|
|
output = capture_stdout { inventory.backfill_etags_and_list_missing }
|
|
|
|
|
|
|
|
expect(output).to eq("#{@upload1.url}\n#{@no_etag.url}\n2 of 5 uploads are missing\n")
|
|
|
|
expect(Discourse.stats.get("missing_s3_uploads")).to eq(2)
|
|
|
|
end
|
2019-01-31 23:40:48 -05:00
|
|
|
|
2020-08-12 19:30:28 -04:00
|
|
|
it "should detect when a url match exists with a different etag" do
|
|
|
|
differing_etag = Upload.find_by(etag: "defcaac0b4aca535c284e95f30d608d0")
|
|
|
|
differing_etag.update_columns(etag: "somethingelse")
|
|
|
|
|
|
|
|
output = capture_stdout { inventory.backfill_etags_and_list_missing }
|
|
|
|
|
|
|
|
expect(output).to eq(<<~TEXT)
|
|
|
|
#{differing_etag.url} has different etag
|
|
|
|
#{@upload1.url}
|
|
|
|
#{@no_etag.url}
|
|
|
|
3 of 5 uploads are missing
|
|
|
|
1 of these are caused by differing etags
|
|
|
|
Null the etag column and re-run for automatic backfill
|
|
|
|
TEXT
|
|
|
|
expect(Discourse.stats.get("missing_s3_uploads")).to eq(3)
|
|
|
|
end
|
|
|
|
|
2020-08-11 00:43:51 -04:00
|
|
|
it "marks missing uploads as not verified and found uploads as verified. uploads not checked will be verified nil" do
|
2020-09-16 23:35:29 -04:00
|
|
|
expect(
|
|
|
|
Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count,
|
|
|
|
).to eq(12)
|
2020-08-11 00:43:51 -04:00
|
|
|
output = capture_stdout { inventory.backfill_etags_and_list_missing }
|
2019-01-31 23:40:48 -05:00
|
|
|
|
2020-09-16 23:35:29 -04:00
|
|
|
verification_status = Upload.pluck(:verification_status)
|
|
|
|
expect(
|
|
|
|
Upload.where(verification_status: Upload.verification_statuses[:verified]).count,
|
|
|
|
).to eq(3)
|
|
|
|
expect(
|
|
|
|
Upload.where(verification_status: Upload.verification_statuses[:invalid_etag]).count,
|
|
|
|
).to eq(2)
|
|
|
|
expect(
|
|
|
|
Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count,
|
|
|
|
).to eq(7)
|
2019-01-31 23:40:48 -05:00
|
|
|
end
|
|
|
|
|
2020-08-11 00:43:51 -04:00
|
|
|
it "does not affect the updated_at date of uploads" do
|
|
|
|
upload_1_updated = @upload1.updated_at
|
|
|
|
upload_2_updated = @upload2.updated_at
|
|
|
|
no_etag_updated = @no_etag.updated_at
|
|
|
|
|
|
|
|
output = capture_stdout { inventory.backfill_etags_and_list_missing }
|
|
|
|
|
|
|
|
expect(@upload1.reload.updated_at).to eq_time(upload_1_updated)
|
|
|
|
expect(@upload2.reload.updated_at).to eq_time(upload_2_updated)
|
|
|
|
expect(@no_etag.reload.updated_at).to eq_time(no_etag_updated)
|
|
|
|
end
|
2019-01-31 23:40:48 -05:00
|
|
|
end
|
2019-02-13 18:48:06 -05:00
|
|
|
|
|
|
|
it "should backfill etags to uploads table correctly" do
|
2019-02-14 14:04:35 -05:00
|
|
|
files = [
|
2020-07-28 20:49:45 -04:00
|
|
|
[
|
|
|
|
"#{Discourse.store.absolute_base_url}/uploads/default/original/1X/0184537a4f419224404d013414e913a4f56018f2.jpg",
|
|
|
|
"defcaac0b4aca535c284e95f30d608d0",
|
|
|
|
],
|
|
|
|
[
|
|
|
|
"#{Discourse.store.absolute_base_url}/uploads/default/original/1X/0789fbf5490babc68326b9cec90eeb0d6590db05.png",
|
|
|
|
"25c02eaceef4cb779fc17030d33f7f06",
|
2023-01-09 06:18:21 -05:00
|
|
|
],
|
2019-02-14 14:04:35 -05:00
|
|
|
]
|
|
|
|
files.each { |file| Fabricate(:upload, url: file[0]) }
|
2019-02-13 18:48:06 -05:00
|
|
|
|
2019-08-13 01:59:31 -04:00
|
|
|
inventory.expects(:files).returns([{ key: "Key", filename: "#{csv_filename}.gz" }]).times(3)
|
2019-02-13 18:48:06 -05:00
|
|
|
|
|
|
|
output =
|
|
|
|
capture_stdout do
|
2019-02-19 11:24:35 -05:00
|
|
|
expect { inventory.backfill_etags_and_list_missing }.to change {
|
|
|
|
Upload.where(etag: nil).count
|
|
|
|
}.by(-2)
|
2019-02-13 18:48:06 -05:00
|
|
|
end
|
2019-02-14 14:04:35 -05:00
|
|
|
|
2019-03-13 05:39:07 -04:00
|
|
|
expect(Upload.by_users.order(:url).pluck(:url, :etag)).to eq(files)
|
2019-02-13 18:48:06 -05:00
|
|
|
end
|
2020-07-28 20:49:45 -04:00
|
|
|
|
|
|
|
it "should work when passed preloaded data" do
|
|
|
|
freeze_time
|
|
|
|
|
|
|
|
CSV.foreach(csv_filename, headers: false) do |row|
|
|
|
|
next unless row[S3Inventory::CSV_KEY_INDEX].include?("default")
|
|
|
|
Fabricate(:upload, etag: row[S3Inventory::CSV_ETAG_INDEX], updated_at: 2.days.ago)
|
|
|
|
end
|
|
|
|
|
|
|
|
upload = Fabricate(:upload, etag: "ETag", updated_at: 1.days.ago)
|
|
|
|
Fabricate(:upload, etag: "ETag2", updated_at: Time.now)
|
|
|
|
no_etag = Fabricate(:upload, updated_at: 2.days.ago)
|
|
|
|
|
|
|
|
output =
|
|
|
|
capture_stdout do
|
|
|
|
File.open(csv_filename) do |f|
|
|
|
|
preloaded_inventory =
|
|
|
|
S3Inventory.new(
|
|
|
|
helper,
|
|
|
|
:upload,
|
|
|
|
preloaded_inventory_file: f,
|
|
|
|
preloaded_inventory_date: Time.now,
|
|
|
|
)
|
|
|
|
preloaded_inventory.backfill_etags_and_list_missing
|
2023-01-09 06:18:21 -05:00
|
|
|
end
|
2020-07-28 20:49:45 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
expect(output).to eq("#{upload.url}\n#{no_etag.url}\n2 of 5 uploads are missing\n")
|
|
|
|
expect(Discourse.stats.get("missing_s3_uploads")).to eq(2)
|
|
|
|
end
|
|
|
|
|
2022-07-27 12:14:14 -04:00
|
|
|
describe "s3 inventory configuration" do
|
2020-11-05 08:39:40 -05:00
|
|
|
let(:bucket_name) { "s3-upload-bucket" }
|
|
|
|
let(:subfolder_path) { "subfolder" }
|
|
|
|
before { SiteSetting.s3_upload_bucket = "#{bucket_name}/#{subfolder_path}" }
|
|
|
|
|
|
|
|
it "is formatted correctly for subfolders" do
|
|
|
|
s3_helper = S3Helper.new(SiteSetting.Upload.s3_upload_bucket.downcase, "", client: client)
|
|
|
|
config = S3Inventory.new(s3_helper, :upload).send(:inventory_configuration)
|
|
|
|
|
|
|
|
expect(config[:destination][:s3_bucket_destination][:bucket]).to eq(
|
|
|
|
"arn:aws:s3:::#{bucket_name}",
|
|
|
|
)
|
|
|
|
expect(config[:destination][:s3_bucket_destination][:prefix]).to eq(
|
|
|
|
"#{subfolder_path}/inventory/1",
|
|
|
|
)
|
|
|
|
expect(config[:id]).to eq("#{subfolder_path}-original")
|
|
|
|
expect(config[:schedule][:frequency]).to eq("Daily")
|
|
|
|
expect(config[:included_object_versions]).to eq("Current")
|
|
|
|
expect(config[:optional_fields]).to eq(["ETag"])
|
|
|
|
expect(config[:filter][:prefix]).to eq(subfolder_path)
|
|
|
|
end
|
|
|
|
end
|
2019-01-31 23:40:48 -05:00
|
|
|
end
|