diff --git a/app/jobs/regular/update_s3_inventory.rb b/app/jobs/regular/update_s3_inventory.rb deleted file mode 100644 index 699db498860..00000000000 --- a/app/jobs/regular/update_s3_inventory.rb +++ /dev/null @@ -1,21 +0,0 @@ -# frozen_string_literal: true - -require "s3_inventory" - -module Jobs - # if upload bucket changes or inventory bucket changes we want to update s3 bucket policy and inventory configuration - class UpdateS3Inventory < ::Jobs::Base - def execute(args) - unless SiteSetting.enable_s3_inventory? && SiteSetting.Upload.enable_s3_uploads && - SiteSetting.s3_configure_inventory_policy - return - end - - %i[upload optimized].each do |type| - s3_inventory = S3Inventory.new(Discourse.store.s3_helper, type) - s3_inventory.update_bucket_policy if type == :upload - s3_inventory.update_bucket_inventory_configuration - end - end - end -end diff --git a/app/jobs/scheduled/ensure_s3_uploads_existence.rb b/app/jobs/scheduled/ensure_s3_uploads_existence.rb index 29338784433..c7a80e16a73 100644 --- a/app/jobs/scheduled/ensure_s3_uploads_existence.rb +++ b/app/jobs/scheduled/ensure_s3_uploads_existence.rb @@ -15,10 +15,6 @@ module Jobs end end - def s3_helper - Discourse.store.s3_helper - end - def prepare_for_all_sites inventory = S3Inventory.new(s3_helper, :upload) @db_inventories = inventory.prepare_for_all_sites @@ -26,8 +22,7 @@ module Jobs end def execute(args) - return if !SiteSetting.enable_s3_inventory - require "s3_inventory" + return if (s3_inventory_bucket = SiteSetting.s3_inventory_bucket).blank? if !@db_inventories && Rails.configuration.multisite && GlobalSetting.use_s3? prepare_for_all_sites @@ -37,13 +32,13 @@ module Jobs preloaded_inventory_file = @db_inventories[RailsMultisite::ConnectionManagement.current_db] S3Inventory.new( - s3_helper, :upload, + s3_inventory_bucket:, preloaded_inventory_file: preloaded_inventory_file, preloaded_inventory_date: @inventory_date, ).backfill_etags_and_list_missing else - S3Inventory.new(s3_helper, :upload).backfill_etags_and_list_missing + S3Inventory.new(:upload, s3_inventory_bucket:).backfill_etags_and_list_missing end end end diff --git a/config/initializers/014-track-setting-changes.rb b/config/initializers/014-track-setting-changes.rb index d6b3d750338..99347537d60 100644 --- a/config/initializers/014-track-setting-changes.rb +++ b/config/initializers/014-track-setting-changes.rb @@ -53,8 +53,6 @@ DiscourseEvent.on(:site_setting_changed) do |name, old_value, new_value| Scheduler::Defer.later("Null topic slug") { Topic.update_all(slug: nil) } end - Jobs.enqueue(:update_s3_inventory) if %i[enable_s3_inventory s3_upload_bucket].include?(name) - SvgSprite.expire_cache if name.to_s.include?("_icon") SiteIconManager.ensure_optimized! if SiteIconManager::WATCHED_SETTINGS.include?(name) diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 72b9a2d003d..7a17ca17631 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -1973,7 +1973,6 @@ en: s3_endpoint: "The endpoint can be modified to backup to an S3 compatible service like DigitalOcean Spaces or Minio. WARNING: Leave blank if using AWS S3." s3_configure_tombstone_policy: "Enable automatic deletion policy for tombstone uploads. IMPORTANT: If disabled, no space will be reclaimed after uploads are deleted." s3_disable_cleanup: "Prevent removal of old backups from S3 when there are more backups than the maximum allowed." - enable_s3_inventory: "Generate reports and verify uploads using Amazon S3 inventory. IMPORTANT: requires valid S3 credentials (both access key id & secret access key)." s3_use_acls: "AWS recommends not using ACLs on S3 buckets; if you are following this advice, uncheck this option. This must be enabled if you are using secure uploads." backup_time_of_day: "Time of day UTC when the backup should occur." backup_with_uploads: "Include uploads in scheduled backups. Disabling this will only backup the database." diff --git a/config/site_settings.yml b/config/site_settings.yml index 20c72785120..c540d295d78 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -1581,11 +1581,6 @@ files: default: true s3_use_acls: default: true - enable_s3_inventory: - default: false - s3_configure_inventory_policy: - default: true - hidden: true s3_presigned_get_url_expires_after_seconds: default: 300 hidden: true @@ -2487,6 +2482,10 @@ backups: s3_backup_bucket: default: "" regex: '^[a-z0-9\-\/]+$' # can't use '.' when using HTTPS + s3_inventory_bucket: + hidden: true + default: "" + regex: '^[a-z0-9\-\/_]+$' # can't use '.' when using HTTPS s3_disable_cleanup: default: false backup_time_of_day: diff --git a/lib/file_store/s3_store.rb b/lib/file_store/s3_store.rb index 55f6668f156..e363a1dea33 100644 --- a/lib/file_store/s3_store.rb +++ b/lib/file_store/s3_store.rb @@ -297,10 +297,12 @@ module FileStore end def list_missing_uploads(skip_optimized: false) - if SiteSetting.enable_s3_inventory - require "s3_inventory" - S3Inventory.new(s3_helper, :upload).backfill_etags_and_list_missing - S3Inventory.new(s3_helper, :optimized).backfill_etags_and_list_missing unless skip_optimized + if s3_inventory_bucket = SiteSetting.s3_inventory_bucket + S3Inventory.new(:upload, s3_inventory_bucket:).backfill_etags_and_list_missing + + unless skip_optimized + S3Inventory.new(:optimized, s3_inventory_bucket:).backfill_etags_and_list_missing + end else list_missing(Upload.by_users, "original/") list_missing(OptimizedImage, "optimized/") unless skip_optimized diff --git a/lib/s3_helper.rb b/lib/s3_helper.rb index e8d5a05e9e0..60b832f5aff 100644 --- a/lib/s3_helper.rb +++ b/lib/s3_helper.rb @@ -282,7 +282,12 @@ class S3Helper end def s3_client - @s3_client ||= Aws::S3::Client.new(@s3_options) + @s3_client ||= init_aws_s3_client + end + + def stub_client_responses! + raise "This method is only allowed to be used in the testing environment" if !Rails.env.test? + @s3_client = init_aws_s3_client(stub_responses: true) end def s3_inventory_path(path = "inventory") @@ -381,6 +386,12 @@ class S3Helper private + def init_aws_s3_client(stub_responses: false) + options = @s3_options + options = options.merge(stub_responses: true) if stub_responses + Aws::S3::Client.new(options) + end + def fetch_bucket_cors_rules begin s3_resource.client.get_bucket_cors(bucket: @s3_bucket_name).cors_rules&.map(&:to_h) || [] diff --git a/lib/s3_inventory.rb b/lib/s3_inventory.rb index 73fb7ab46a4..eec2b1c4fe3 100644 --- a/lib/s3_inventory.rb +++ b/lib/s3_inventory.rb @@ -4,17 +4,21 @@ require "aws-sdk-s3" require "csv" class S3Inventory - attr_reader :type, :inventory_date + attr_reader :type, :inventory_date, :s3_helper CSV_KEY_INDEX = 1 CSV_ETAG_INDEX = 2 INVENTORY_PREFIX = "inventory" - INVENTORY_VERSION = "1" INVENTORY_LAG = 2.days WAIT_AFTER_RESTORE_DAYS = 2 - def initialize(s3_helper, type, preloaded_inventory_file: nil, preloaded_inventory_date: nil) - @s3_helper = s3_helper + def initialize( + type, + s3_inventory_bucket:, + preloaded_inventory_file: nil, + preloaded_inventory_date: nil + ) + @s3_helper = S3Helper.new(s3_inventory_bucket) if preloaded_inventory_file && preloaded_inventory_date # Data preloaded, so we don't need to fetch it again @@ -189,43 +193,6 @@ class S3Inventory ) end - def update_bucket_policy - @s3_helper.s3_client.put_bucket_policy( - bucket: bucket_name, - policy: { - Version: "2012-10-17", - Statement: [ - { - Sid: "InventoryAndAnalyticsPolicy", - Effect: "Allow", - Principal: { - Service: "s3.amazonaws.com", - }, - Action: ["s3:PutObject"], - Resource: ["#{inventory_path_arn}/*"], - Condition: { - ArnLike: { - "aws:SourceArn": bucket_arn, - }, - StringEquals: { - "s3:x-amz-acl": "bucket-owner-full-control", - }, - }, - }, - ], - }.to_json, - ) - end - - def update_bucket_inventory_configuration - @s3_helper.s3_client.put_bucket_inventory_configuration( - bucket: bucket_name, - id: inventory_id, - inventory_configuration: inventory_configuration, - use_accelerate_endpoint: false, - ) - end - def prepare_for_all_sites db_names = RailsMultisite::ConnectionManagement.all_dbs db_files = {} @@ -248,6 +215,10 @@ class S3Inventory cleanup! end + def s3_client + @s3_helper.s3_client + end + private def cleanup! @@ -318,31 +289,6 @@ class S3Inventory end end - def inventory_configuration - filter_prefix = type - filter_prefix = bucket_folder_path if bucket_folder_path.present? - - { - destination: { - s3_bucket_destination: { - bucket: bucket_arn, - prefix: inventory_path, - format: "CSV", - }, - }, - filter: { - prefix: filter_prefix, - }, - is_enabled: SiteSetting.enable_s3_inventory, - id: inventory_id, - included_object_versions: "Current", - optional_fields: ["ETag"], - schedule: { - frequency: "Daily", - }, - } - end - def bucket_name @s3_helper.s3_bucket_name end @@ -353,8 +299,7 @@ class S3Inventory def unsorted_files objects = [] - - hive_path = File.join(inventory_path, bucket_name, inventory_id, "hive") + hive_path = File.join(bucket_folder_path, "hive") @s3_helper.list(hive_path).each { |obj| objects << obj if obj.key.match?(/symlink\.txt\z/i) } objects @@ -363,28 +308,6 @@ class S3Inventory [] end - def inventory_id - @inventory_id ||= - begin - id = Rails.configuration.multisite ? "original" : type # TODO: rename multisite path to "uploads" - bucket_folder_path.present? ? "#{bucket_folder_path}-#{id}" : id - end - end - - def inventory_path_arn - File.join(bucket_arn, inventory_path) - end - - def inventory_path - path = File.join(INVENTORY_PREFIX, INVENTORY_VERSION) - path = File.join(bucket_folder_path, path) if bucket_folder_path.present? - path - end - - def bucket_arn - "arn:aws:s3:::#{bucket_name}" - end - def log(message, ex = nil) puts(message) Rails.logger.error("#{ex}\n" + (ex.backtrace || []).join("\n")) if ex diff --git a/lib/site_settings/validations.rb b/lib/site_settings/validations.rb index 4a25bc9eda4..828537c5439 100644 --- a/lib/site_settings/validations.rb +++ b/lib/site_settings/validations.rb @@ -187,12 +187,6 @@ module SiteSettings::Validations end end - def validate_enable_s3_inventory(new_val) - if new_val == "t" && !SiteSetting.Upload.enable_s3_uploads - validate_error :enable_s3_uploads_is_required - end - end - def validate_backup_location(new_val) return unless new_val == BackupLocationSiteSetting::S3 if SiteSetting.s3_backup_bucket.blank? diff --git a/spec/jobs/ensure_s3_uploads_existence_spec.rb b/spec/jobs/ensure_s3_uploads_existence_spec.rb index e7c8d267030..c4916548515 100644 --- a/spec/jobs/ensure_s3_uploads_existence_spec.rb +++ b/spec/jobs/ensure_s3_uploads_existence_spec.rb @@ -3,11 +3,8 @@ RSpec.describe Jobs::EnsureS3UploadsExistence do subject(:job) { described_class.new } - context "with S3 inventory enabled" do - before do - setup_s3 - SiteSetting.enable_s3_inventory = true - end + context "when `s3_inventory_bucket` has been set" do + before { SiteSetting.s3_inventory_bucket = "some-bucket-name" } it "works" do S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).once @@ -15,8 +12,8 @@ RSpec.describe Jobs::EnsureS3UploadsExistence do end end - context "with S3 inventory disabled" do - before { SiteSetting.enable_s3_inventory = false } + context "when `s3_inventory_bucket` has not been set" do + before { SiteSetting.s3_inventory_bucket = nil } it "doesn't execute" do S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).never diff --git a/spec/jobs/update_s3_inventory_spec.rb b/spec/jobs/update_s3_inventory_spec.rb deleted file mode 100644 index ab43aa4060b..00000000000 --- a/spec/jobs/update_s3_inventory_spec.rb +++ /dev/null @@ -1,63 +0,0 @@ -# frozen_string_literal: true - -require "file_store/s3_store" - -RSpec.describe Jobs::UpdateS3Inventory do - before do - setup_s3 - SiteSetting.s3_upload_bucket = "special-bucket" - SiteSetting.enable_s3_inventory = true - - store = FileStore::S3Store.new - @client = Aws::S3::Client.new(stub_responses: true) - store.s3_helper.stubs(:s3_client).returns(@client) - Discourse.stubs(:store).returns(store) - end - - it "updates the bucket policy and inventory configuration in S3" do - id = "original" - path = File.join(S3Inventory::INVENTORY_PREFIX, S3Inventory::INVENTORY_VERSION) - - @client.expects(:put_bucket_policy).with( - bucket: "special-bucket", - policy: - %Q|{"Version":"2012-10-17","Statement":[{"Sid":"InventoryAndAnalyticsPolicy","Effect":"Allow","Principal":{"Service":"s3.amazonaws.com"},"Action":["s3:PutObject"],"Resource":["arn:aws:s3:::special-bucket/#{path}/*"],"Condition":{"ArnLike":{"aws:SourceArn":"arn:aws:s3:::special-bucket"},"StringEquals":{"s3:x-amz-acl":"bucket-owner-full-control"}}}]}|, - ) - @client.expects(:put_bucket_inventory_configuration) - @client.expects(:put_bucket_inventory_configuration).with( - bucket: "special-bucket", - id: id, - inventory_configuration: { - destination: { - s3_bucket_destination: { - bucket: "arn:aws:s3:::special-bucket", - prefix: path, - format: "CSV", - }, - }, - filter: { - prefix: id, - }, - is_enabled: true, - id: id, - included_object_versions: "Current", - optional_fields: ["ETag"], - schedule: { - frequency: "Daily", - }, - }, - use_accelerate_endpoint: false, - ) - - described_class.new.execute(nil) - end - - it "doesn't update the policy with s3_configure_inventory_policy disabled" do - SiteSetting.s3_configure_inventory_policy = false - - @client.expects(:put_bucket_policy).never - @client.expects(:put_bucket_inventory_configuration).never - - described_class.new.execute(nil) - end -end diff --git a/spec/lib/s3_inventory_multisite_spec.rb b/spec/lib/s3_inventory_multisite_spec.rb index 19cb36313c8..9fab4407274 100644 --- a/spec/lib/s3_inventory_multisite_spec.rb +++ b/spec/lib/s3_inventory_multisite_spec.rb @@ -5,15 +5,16 @@ require "s3_inventory" require "file_store/s3_store" RSpec.describe "S3Inventory", type: :multisite do - let(:client) { Aws::S3::Client.new(stub_responses: true) } - let(:helper) { S3Helper.new(SiteSetting.Upload.s3_upload_bucket.downcase, "", client: client) } - let(:inventory) { S3Inventory.new(helper, :upload) } + let(:inventory) do + S3Inventory.new(:upload, s3_inventory_bucket: "some-inventory-bucket/some/prefix") + end + let(:csv_filename) { "#{Rails.root}/spec/fixtures/csv/s3_inventory.csv" } it "can create per-site files" do freeze_time setup_s3 - SiteSetting.enable_s3_inventory = true + inventory.stubs(:files).returns([{ key: "Key", filename: "#{csv_filename}.gz" }]) inventory.stubs(:cleanup!) @@ -23,6 +24,7 @@ RSpec.describe "S3Inventory", type: :multisite do expect(db1.lines.count).to eq(3) expect(db2.lines.count).to eq(1) + files.values.each do |f| f.close f.unlink diff --git a/spec/lib/s3_inventory_spec.rb b/spec/lib/s3_inventory_spec.rb index c1b07b743eb..6021fb11bdc 100644 --- a/spec/lib/s3_inventory_spec.rb +++ b/spec/lib/s3_inventory_spec.rb @@ -1,25 +1,19 @@ # frozen_string_literal: true -require "s3_helper" -require "s3_inventory" -require "file_store/s3_store" - RSpec.describe S3Inventory do - let(:client) { Aws::S3::Client.new(stub_responses: true) } - let(:resource) { Aws::S3::Resource.new(client: client) } - let(:bucket) { resource.bucket(SiteSetting.Upload.s3_upload_bucket.downcase) } - let(:helper) { S3Helper.new(bucket.name, "", client: client, bucket: bucket) } - let(:inventory) { S3Inventory.new(helper, :upload) } + let(:inventory) do + S3Inventory.new(:upload, s3_inventory_bucket: "some-inventory-bucket/inventoried-bucket/prefix") + end + let(:csv_filename) { "#{Rails.root}/spec/fixtures/csv/s3_inventory.csv" } before do - setup_s3 - SiteSetting.enable_s3_inventory = true + inventory.s3_helper.stub_client_responses! inventory.stubs(:cleanup!) end it "should raise error if an inventory file is not found" do - client.stub_responses(:list_objects, contents: []) + inventory.s3_client.stub_responses(:list_objects, contents: []) output = capture_stdout { inventory.backfill_etags_and_list_missing } expect(output).to eq("Failed to list inventory from S3\n") end @@ -141,7 +135,7 @@ RSpec.describe S3Inventory do end it "should run if inventory files are at least #{described_class::WAIT_AFTER_RESTORE_DAYS.days} days older than the last restore date" do - client.stub_responses( + inventory.s3_client.stub_responses( :list_objects_v2, { contents: [ @@ -155,16 +149,13 @@ RSpec.describe S3Inventory do }, ) - client.expects(:get_object).once + inventory.s3_client.expects(:get_object).once - capture_stdout do - inventory = described_class.new(helper, :upload) - inventory.backfill_etags_and_list_missing - end + capture_stdout { inventory.backfill_etags_and_list_missing } end it "should not run if inventory files are not at least #{described_class::WAIT_AFTER_RESTORE_DAYS.days} days older than the last restore date" do - client.stub_responses( + inventory.s3_client.stub_responses( :list_objects_v2, { contents: [ @@ -177,12 +168,9 @@ RSpec.describe S3Inventory do }, ) - client.expects(:get_object).never + inventory.s3_client.expects(:get_object).never - capture_stdout do - inventory = described_class.new(helper, :upload) - inventory.backfill_etags_and_list_missing - end + capture_stdout { inventory.backfill_etags_and_list_missing } end end @@ -203,11 +191,12 @@ RSpec.describe S3Inventory do File.open(csv_filename) do |f| preloaded_inventory = S3Inventory.new( - helper, :upload, + s3_inventory_bucket: "some-inventory-bucket", preloaded_inventory_file: f, preloaded_inventory_date: Time.now, ) + preloaded_inventory.backfill_etags_and_list_missing end end @@ -215,27 +204,4 @@ RSpec.describe S3Inventory do expect(output).to eq("#{upload.url}\n#{no_etag.url}\n2 of 5 uploads are missing\n") expect(Discourse.stats.get("missing_s3_uploads")).to eq(2) end - - describe "s3 inventory configuration" do - let(:bucket_name) { "s3-upload-bucket" } - let(:subfolder_path) { "subfolder" } - before { SiteSetting.s3_upload_bucket = "#{bucket_name}/#{subfolder_path}" } - - it "is formatted correctly for subfolders" do - s3_helper = S3Helper.new(SiteSetting.Upload.s3_upload_bucket.downcase, "", client: client) - config = S3Inventory.new(s3_helper, :upload).send(:inventory_configuration) - - expect(config[:destination][:s3_bucket_destination][:bucket]).to eq( - "arn:aws:s3:::#{bucket_name}", - ) - expect(config[:destination][:s3_bucket_destination][:prefix]).to eq( - "#{subfolder_path}/inventory/1", - ) - expect(config[:id]).to eq("#{subfolder_path}-original") - expect(config[:schedule][:frequency]).to eq("Daily") - expect(config[:included_object_versions]).to eq("Current") - expect(config[:optional_fields]).to eq(["ETag"]) - expect(config[:filter][:prefix]).to eq(subfolder_path) - end - end end