FIX: S3 store has_been_uploaded? was not taking into account s3 bucket path (#9810)
In some cases, between Discourse forums the hostname of a URL could match if they are hosting S3 files on the same bucket but the S3 bucket path might not. So e.g. https://testbucket.somesite.com/testpath/some/file/url.png vs https://testbucket.somesite.com/prodpath/some/file/url.png. So has_been_uploaded? was returning true for the second URL, even though it may have been uploaded on a different Discourse forum. This is a very rare case but must be accounted for, because this impacts UrlHelper.is_local which mistakenly thinks the file has already been downloaded and thus allows the URL to be cooked, where we want to return the full URL to be downloaded using PullHotlinkedImages.
This commit is contained in:
parent
0a700d81fc
commit
72f139191e
|
@ -79,12 +79,30 @@ module FileStore
|
|||
def has_been_uploaded?(url)
|
||||
return false if url.blank?
|
||||
|
||||
parsed_url = URI.parse(url)
|
||||
base_hostname = URI.parse(absolute_base_url).hostname
|
||||
return true if url[base_hostname]
|
||||
if url[base_hostname]
|
||||
# if the hostnames match it means the upload is in the same
|
||||
# bucket on s3. however, the bucket folder path may differ in
|
||||
# some cases, and we do not want to assume the url is uploaded
|
||||
# here. e.g. the path of the current site could be /prod and the
|
||||
# other site could be /staging
|
||||
if s3_bucket_folder_path.present?
|
||||
return parsed_url.path.starts_with?("/#{s3_bucket_folder_path}")
|
||||
else
|
||||
return true
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
return false if SiteSetting.Upload.s3_cdn_url.blank?
|
||||
cdn_hostname = URI.parse(SiteSetting.Upload.s3_cdn_url || "").hostname
|
||||
cdn_hostname.presence && url[cdn_hostname]
|
||||
return true if cdn_hostname.presence && url[cdn_hostname]
|
||||
false
|
||||
end
|
||||
|
||||
def s3_bucket_folder_path
|
||||
@s3_helper.s3_bucket_folder_path
|
||||
end
|
||||
|
||||
def s3_bucket_name
|
||||
|
|
|
@ -251,7 +251,7 @@ describe FileStore::S3Store do
|
|||
|
||||
before do
|
||||
optimized_image.update!(
|
||||
url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com#{image_path}"
|
||||
url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/#{image_path}"
|
||||
)
|
||||
end
|
||||
|
||||
|
@ -272,6 +272,12 @@ describe FileStore::S3Store do
|
|||
SiteSetting.s3_upload_bucket = "s3-upload-bucket/discourse-uploads"
|
||||
end
|
||||
|
||||
before do
|
||||
optimized_image.update!(
|
||||
url: "//s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/discourse-uploads/#{image_path}"
|
||||
)
|
||||
end
|
||||
|
||||
it "removes the file from s3 with the right paths" do
|
||||
s3_helper.expects(:s3_bucket).returns(s3_bucket).at_least_once
|
||||
s3_object = stub
|
||||
|
|
|
@ -217,4 +217,47 @@ RSpec.describe 'Multisite s3 uploads', type: :multisite do
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "#has_been_uploaded?" do
|
||||
before do
|
||||
SiteSetting.s3_region = 'us-west-1'
|
||||
SiteSetting.s3_upload_bucket = "s3-upload-bucket/test"
|
||||
SiteSetting.s3_access_key_id = "s3-access-key-id"
|
||||
SiteSetting.s3_secret_access_key = "s3-secret-access-key"
|
||||
SiteSetting.enable_s3_uploads = true
|
||||
end
|
||||
|
||||
let(:store) { FileStore::S3Store.new }
|
||||
let(:client) { Aws::S3::Client.new(stub_responses: true) }
|
||||
let(:resource) { Aws::S3::Resource.new(client: client) }
|
||||
let(:s3_bucket) { resource.bucket(SiteSetting.s3_upload_bucket) }
|
||||
let(:s3_helper) { store.s3_helper }
|
||||
|
||||
it "returns false for blank urls" do
|
||||
url = ""
|
||||
expect(store.has_been_uploaded?(url)).to eq(false)
|
||||
end
|
||||
|
||||
it "returns true if the base hostname is the same for both urls" do
|
||||
url = "https://s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/test/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png"
|
||||
expect(store.has_been_uploaded?(url)).to eq(true)
|
||||
end
|
||||
|
||||
it "returns false if the base hostname is the same for both urls BUT the bucket name is different in the path" do
|
||||
bucket = "someotherbucket"
|
||||
url = "https://s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/#{bucket}/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png"
|
||||
expect(store.has_been_uploaded?(url)).to eq(false)
|
||||
end
|
||||
|
||||
it "returns false if the hostnames do not match and the s3_cdn_url is blank" do
|
||||
url = "https://www.someotherhostname.com/test/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png"
|
||||
expect(store.has_been_uploaded?(url)).to eq(false)
|
||||
end
|
||||
|
||||
it "returns true if the s3_cdn_url is present and matches the url hostname" do
|
||||
SiteSetting.s3_cdn_url = "https://www.someotherhostname.com"
|
||||
url = "https://www.someotherhostname.com/test/original/2X/d/dd7964f5fd13e1103c5244ca30abe1936c0a4b88.png"
|
||||
expect(store.has_been_uploaded?(url)).to eq(true)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue