From 72f139191e9b3aeebeca31ffa8b75977afe5d816 Mon Sep 17 00:00:00 2001 From: Martin Brennan Date: Wed, 20 May 2020 10:40:38 +1000 Subject: [PATCH] FIX: S3 store has_been_uploaded? was not taking into account s3 bucket path (#9810) In some cases, between Discourse forums the hostname of a URL could match if they are hosting S3 files on the same bucket but the S3 bucket path might not. So e.g. https://testbucket.somesite.com/testpath/some/file/url.png vs https://testbucket.somesite.com/prodpath/some/file/url.png. So has_been_uploaded? was returning true for the second URL, even though it may have been uploaded on a different Discourse forum. This is a very rare case but must be accounted for, because this impacts UrlHelper.is_local which mistakenly thinks the file has already been downloaded and thus allows the URL to be cooked, where we want to return the full URL to be downloaded using PullHotlinkedImages. --- lib/file_store/s3_store.rb | 22 ++++++++++- spec/components/file_store/s3_store_spec.rb | 8 +++- spec/multisite/s3_store_spec.rb | 43 +++++++++++++++++++++ 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/lib/file_store/s3_store.rb b/lib/file_store/s3_store.rb index 6a26ebd7ee6..46dece4cfcd 100644 --- a/lib/file_store/s3_store.rb +++ b/lib/file_store/s3_store.rb @@ -79,12 +79,30 @@ module FileStore def has_been_uploaded?(url) return false if url.blank? + parsed_url = URI.parse(url) base_hostname = URI.parse(absolute_base_url).hostname - return true if url[base_hostname] + if url[base_hostname] + # if the hostnames match it means the upload is in the same + # bucket on s3. however, the bucket folder path may differ in + # some cases, and we do not want to assume the url is uploaded + # here. e.g. the path of the current site could be /prod and the + # other site could be /staging + if s3_bucket_folder_path.present? + return parsed_url.path.starts_with?("/#{s3_bucket_folder_path}") + else + return true + end + return false + end return false if SiteSetting.Upload.s3_cdn_url.blank? cdn_hostname = URI.parse(SiteSetting.Upload.s3_cdn_url || "").hostname - cdn_hostname.presence && url[cdn_hostname] + return true if cdn_hostname.presence && url[cdn_hostname] + false + end + + def s3_bucket_folder_path + @s3_helper.s3_bucket_folder_path end def s3_bucket_name diff --git a/spec/components/file_store/s3_store_spec.rb b/spec/components/file_store/s3_store_spec.rb index 99f57c71047..81e39d0e20b 100644 --- a/spec/components/file_store/s3_store_spec.rb +++ b/spec/components/file_store/s3_store_spec.rb @@ -251,7 +251,7 @@ describe FileStore::S3Store do before do optimized_image.update!( - url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com#{image_path}" + url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/#{image_path}" ) end @@ -272,6 +272,12 @@ describe FileStore::S3Store do SiteSetting.s3_upload_bucket = "s3-upload-bucket/discourse-uploads" end + before do + optimized_image.update!( + url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/discourse-uploads/#{image_path}" + ) + end + it "removes the file from s3 with the right paths" do s3_helper.expects(:s3_bucket).returns(s3_bucket).at_least_once s3_object = stub diff --git a/spec/multisite/s3_store_spec.rb b/spec/multisite/s3_store_spec.rb index 922c5f433ba..064c699a12b 100644 --- a/spec/multisite/s3_store_spec.rb +++ b/spec/multisite/s3_store_spec.rb @@ -217,4 +217,47 @@ RSpec.describe 'Multisite s3 uploads', type: :multisite do end end end + + describe "#has_been_uploaded?" do + before do + SiteSetting.s3_region = 'us-west-1' + SiteSetting.s3_upload_bucket = "s3-upload-bucket/test" + SiteSetting.s3_access_key_id = "s3-access-key-id" + SiteSetting.s3_secret_access_key = "s3-secret-access-key" + SiteSetting.enable_s3_uploads = true + end + + let(:store) { FileStore::S3Store.new } + let(:client) { Aws::S3::Client.new(stub_responses: true) } + let(:resource) { Aws::S3::Resource.new(client: client) } + let(:s3_bucket) { resource.bucket(SiteSetting.s3_upload_bucket) } + let(:s3_helper) { store.s3_helper } + + it "returns false for blank urls" do + url = "" + expect(store.has_been_uploaded?(url)).to eq(false) + end + + it "returns true if the base hostname is the same for both urls" do + url = "https://s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/test/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png" + expect(store.has_been_uploaded?(url)).to eq(true) + end + + it "returns false if the base hostname is the same for both urls BUT the bucket name is different in the path" do + bucket = "someotherbucket" + url = "https://s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/#{bucket}/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png" + expect(store.has_been_uploaded?(url)).to eq(false) + end + + it "returns false if the hostnames do not match and the s3_cdn_url is blank" do + url = "https://www.someotherhostname.com/test/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png" + expect(store.has_been_uploaded?(url)).to eq(false) + end + + it "returns true if the s3_cdn_url is present and matches the url hostname" do + SiteSetting.s3_cdn_url = "https://www.someotherhostname.com" + url = "https://www.someotherhostname.com/test/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png" + expect(store.has_been_uploaded?(url)).to eq(true) + end + end end