From f84eda7c8d68075b5a7b8a23c4472cdd9a645b77 Mon Sep 17 00:00:00 2001 From: Alan Guo Xiang Tan Date: Thu, 23 May 2024 13:15:16 +0800 Subject: [PATCH] FIX: `Post#each_upload_url` yielding external URLs (#27149) This commit updates `Post#each_upload_url` to reject URLs that do not have a host which matches `Discourse.current_hostname` but follows the `/uploads/short-url` uploads URL format. This situation most commonly happen when users copy upload URL link between different Discourse sites. --- app/models/post.rb | 16 +++++++++++++++- spec/models/post_spec.rb | 19 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/app/models/post.rb b/app/models/post.rb index 52ee157a6df..51f9f1ad1a7 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -1142,6 +1142,7 @@ class Post < ActiveRecord::Base def each_upload_url(fragments: nil, include_local_upload: true) current_db = RailsMultisite::ConnectionManagement.current_db + upload_patterns = [ %r{/uploads/#{current_db}/}, %r{/original/}, @@ -1150,6 +1151,7 @@ class Post < ActiveRecord::Base ] fragments ||= Nokogiri::HTML5.fragment(self.cooked) + selectors = fragments.css( "a/@href", @@ -1183,7 +1185,17 @@ class Post < ActiveRecord::Base sha1 = Upload.sha1_from_short_url(src) yield(src, nil, sha1) next - elsif src.include?("/uploads/short-url/") + end + + if src.include?("/uploads/short-url/") + host = + begin + URI(src).host + rescue URI::Error + end + + next if host.present? && host != Discourse.current_hostname + sha1 = Upload.sha1_from_short_path(src) yield(src, nil, sha1) next @@ -1193,6 +1205,7 @@ class Post < ActiveRecord::Base next if Rails.configuration.multisite && src.exclude?(current_db) src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//") + if !Discourse.store.has_been_uploaded?(src) && !Upload.secure_uploads_url?(src) && !(include_local_upload && src =~ %r{\A/[^/]}i) next @@ -1226,6 +1239,7 @@ class Post < ActiveRecord::Base DistributedMutex.synchronize("find_missing_uploads", validity: 30.minutes) do PostCustomField.where(name: Post::MISSING_UPLOADS).delete_all + query = Post .have_uploads diff --git a/spec/models/post_spec.rb b/spec/models/post_spec.rb index bd82b76d990..2ca17c1e4ec 100644 --- a/spec/models/post_spec.rb +++ b/spec/models/post_spec.rb @@ -2087,6 +2087,25 @@ RSpec.describe Post do expect(urls).to be_empty end + it "should skip external URLs following the `/uploads/short-url` pattern if a host is present and the host is not the configured host" do + upload = Fabricate(:upload) + + raw = <<~RAW + [Upload link with Discourse.base_url](#{Discourse.base_url}/uploads/short-url/#{upload.sha1}.#{upload.extension}) + [Upload link without Discourse.base_url](https://some.other.host/uploads/short-url/#{upload.sha1}.#{upload.extension}) + [Upload link without host](/uploads/short-url/#{upload.sha1}.#{upload.extension}) + RAW + + post = Fabricate(:post, raw: raw) + urls = [] + post.each_upload_url { |src, _, _| urls << src } + + expect(urls).to contain_exactly( + "#{Discourse.base_url}/uploads/short-url/#{upload.sha1}.#{upload.extension}", + "/uploads/short-url/#{upload.sha1}.#{upload.extension}", + ) + end + it "skip S3 cdn urls with different path" do setup_s3 SiteSetting.Upload.stubs(:s3_cdn_url).returns("https://cdn.example.com/site1")