FIX: S3 store has_been_uploaded? was not taking into account s3 bucket path (#9810)

In some cases, between Discourse forums the hostname of a URL could match if they are hosting S3 files on the same bucket but the S3 bucket path might not. So e.g. https://testbucket.somesite.com/testpath/some/file/url.png vs https://testbucket.somesite.com/prodpath/some/file/url.png. So has_been_uploaded? was returning true for the second URL, even though it may have been uploaded on a different Discourse forum.

This is a very rare case but must be accounted for, because this impacts UrlHelper.is_local which mistakenly thinks the file has already been downloaded and thus allows the URL to be cooked, where we want to return the full URL to be downloaded using PullHotlinkedImages.
This commit is contained in:
Martin Brennan 2020-05-20 10:40:38 +10:00 committed by GitHub
parent 0a700d81fc
commit 72f139191e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 70 additions and 3 deletions

View File

@ -79,12 +79,30 @@ module FileStore
def has_been_uploaded?(url) def has_been_uploaded?(url)
return false if url.blank? return false if url.blank?
parsed_url = URI.parse(url)
base_hostname = URI.parse(absolute_base_url).hostname base_hostname = URI.parse(absolute_base_url).hostname
return true if url[base_hostname] if url[base_hostname]
# if the hostnames match it means the upload is in the same
# bucket on s3. however, the bucket folder path may differ in
# some cases, and we do not want to assume the url is uploaded
# here. e.g. the path of the current site could be /prod and the
# other site could be /staging
if s3_bucket_folder_path.present?
return parsed_url.path.starts_with?("/#{s3_bucket_folder_path}")
else
return true
end
return false
end
return false if SiteSetting.Upload.s3_cdn_url.blank? return false if SiteSetting.Upload.s3_cdn_url.blank?
cdn_hostname = URI.parse(SiteSetting.Upload.s3_cdn_url || "").hostname cdn_hostname = URI.parse(SiteSetting.Upload.s3_cdn_url || "").hostname
cdn_hostname.presence && url[cdn_hostname] return true if cdn_hostname.presence && url[cdn_hostname]
false
end
def s3_bucket_folder_path
@s3_helper.s3_bucket_folder_path
end end
def s3_bucket_name def s3_bucket_name

View File

@ -251,7 +251,7 @@ describe FileStore::S3Store do
before do before do
optimized_image.update!( optimized_image.update!(
url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com#{image_path}" url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/#{image_path}"
) )
end end
@ -272,6 +272,12 @@ describe FileStore::S3Store do
SiteSetting.s3_upload_bucket = "s3-upload-bucket/discourse-uploads" SiteSetting.s3_upload_bucket = "s3-upload-bucket/discourse-uploads"
end end
before do
optimized_image.update!(
url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/discourse-uploads/#{image_path}"
)
end
it "removes the file from s3 with the right paths" do it "removes the file from s3 with the right paths" do
s3_helper.expects(:s3_bucket).returns(s3_bucket).at_least_once s3_helper.expects(:s3_bucket).returns(s3_bucket).at_least_once
s3_object = stub s3_object = stub

View File

@ -217,4 +217,47 @@ RSpec.describe 'Multisite s3 uploads', type: :multisite do
end end
end end
end end
describe "#has_been_uploaded?" do
before do
SiteSetting.s3_region = 'us-west-1'
SiteSetting.s3_upload_bucket = "s3-upload-bucket/test"
SiteSetting.s3_access_key_id = "s3-access-key-id"
SiteSetting.s3_secret_access_key = "s3-secret-access-key"
SiteSetting.enable_s3_uploads = true
end
let(:store) { FileStore::S3Store.new }
let(:client) { Aws::S3::Client.new(stub_responses: true) }
let(:resource) { Aws::S3::Resource.new(client: client) }
let(:s3_bucket) { resource.bucket(SiteSetting.s3_upload_bucket) }
let(:s3_helper) { store.s3_helper }
it "returns false for blank urls" do
url = ""
expect(store.has_been_uploaded?(url)).to eq(false)
end
it "returns true if the base hostname is the same for both urls" do
url = "https://s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/test/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png"
expect(store.has_been_uploaded?(url)).to eq(true)
end
it "returns false if the base hostname is the same for both urls BUT the bucket name is different in the path" do
bucket = "someotherbucket"
url = "https://s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/#{bucket}/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png"
expect(store.has_been_uploaded?(url)).to eq(false)
end
it "returns false if the hostnames do not match and the s3_cdn_url is blank" do
url = "https://www.someotherhostname.com/test/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png"
expect(store.has_been_uploaded?(url)).to eq(false)
end
it "returns true if the s3_cdn_url is present and matches the url hostname" do
SiteSetting.s3_cdn_url = "https://www.someotherhostname.com"
url = "https://www.someotherhostname.com/test/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png"
expect(store.has_been_uploaded?(url)).to eq(true)
end
end
end end