FIX: `Post#each_upload_url` yielding external URLs (#27149)

This commit updates `Post#each_upload_url` to reject URLs that do not
have a host which matches `Discourse.current_hostname` but follows the
`/uploads/short-url` uploads URL format. This situation most commonly
happen when users copy upload URL link between different Discourse
sites.
This commit is contained in:
Alan Guo Xiang Tan 2024-05-23 13:15:16 +08:00 committed by GitHub
parent 13848594d2
commit f84eda7c8d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 34 additions and 1 deletions

View File

@ -1142,6 +1142,7 @@ class Post < ActiveRecord::Base
def each_upload_url(fragments: nil, include_local_upload: true)
current_db = RailsMultisite::ConnectionManagement.current_db
upload_patterns = [
%r{/uploads/#{current_db}/},
%r{/original/},
@ -1150,6 +1151,7 @@ class Post < ActiveRecord::Base
]
fragments ||= Nokogiri::HTML5.fragment(self.cooked)
selectors =
fragments.css(
"a/@href",
@ -1183,7 +1185,17 @@ class Post < ActiveRecord::Base
sha1 = Upload.sha1_from_short_url(src)
yield(src, nil, sha1)
next
elsif src.include?("/uploads/short-url/")
end
if src.include?("/uploads/short-url/")
host =
begin
URI(src).host
rescue URI::Error
end
next if host.present? && host != Discourse.current_hostname
sha1 = Upload.sha1_from_short_path(src)
yield(src, nil, sha1)
next
@ -1193,6 +1205,7 @@ class Post < ActiveRecord::Base
next if Rails.configuration.multisite && src.exclude?(current_db)
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
if !Discourse.store.has_been_uploaded?(src) && !Upload.secure_uploads_url?(src) &&
!(include_local_upload && src =~ %r{\A/[^/]}i)
next
@ -1226,6 +1239,7 @@ class Post < ActiveRecord::Base
DistributedMutex.synchronize("find_missing_uploads", validity: 30.minutes) do
PostCustomField.where(name: Post::MISSING_UPLOADS).delete_all
query =
Post
.have_uploads

View File

@ -2087,6 +2087,25 @@ RSpec.describe Post do
expect(urls).to be_empty
end
it "should skip external URLs following the `/uploads/short-url` pattern if a host is present and the host is not the configured host" do
upload = Fabricate(:upload)
raw = <<~RAW
[Upload link with Discourse.base_url](#{Discourse.base_url}/uploads/short-url/#{upload.sha1}.#{upload.extension})
[Upload link without Discourse.base_url](https://some.other.host/uploads/short-url/#{upload.sha1}.#{upload.extension})
[Upload link without host](/uploads/short-url/#{upload.sha1}.#{upload.extension})
RAW
post = Fabricate(:post, raw: raw)
urls = []
post.each_upload_url { |src, _, _| urls << src }
expect(urls).to contain_exactly(
"#{Discourse.base_url}/uploads/short-url/#{upload.sha1}.#{upload.extension}",
"/uploads/short-url/#{upload.sha1}.#{upload.extension}",
)
end
it "skip S3 cdn urls with different path" do
setup_s3
SiteSetting.Upload.stubs(:s3_cdn_url).returns("https://cdn.example.com/site1")