From 45b37a8bd17f635c4011865a84a1e0897bd4972e Mon Sep 17 00:00:00 2001 From: Martin Brennan Date: Fri, 24 Jan 2020 11:59:30 +1000 Subject: [PATCH] FIX: Resolve pull hotlinked image and broken link issues for secure media URLs (#8777) When pull_hotlinked_images tried to run on posts with secure media (which had already been downloaded from external sources) we were getting a 404 when trying to download the image because the secure endpoint doesn't allow anon downloads. Also, we were getting into an infinite loop of pull_hotlinked_images because the job didn't consider the secure media URLs as "downloaded" already so it kept trying to download them over and over. In this PR I have also refactored secure-media-upload URL checks and mutations into single source of truth in Upload, adding a SECURE_MEDIA_ROUTE constant to check URLs against too. --- app/jobs/regular/pull_hotlinked_images.rb | 10 ++++-- app/models/post.rb | 2 +- app/models/upload.rb | 14 ++++++++ lib/cooked_post_processor.rb | 5 ++- lib/email/styles.rb | 4 +-- lib/pretty_text.rb | 2 +- lib/pretty_text/helpers.rb | 6 +--- lib/url_helper.rb | 3 +- spec/jobs/pull_hotlinked_images_spec.rb | 39 +++++++++++++++++++++++ 9 files changed, 69 insertions(+), 16 deletions(-) diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb index 0b4823b54d3..5cd69b4ca21 100644 --- a/app/jobs/regular/pull_hotlinked_images.rb +++ b/app/jobs/regular/pull_hotlinked_images.rb @@ -62,6 +62,11 @@ module Jobs schemeless_src = remove_scheme(original_src) unless downloaded_images.include?(schemeless_src) || large_images.include?(schemeless_src) || broken_images.include?(schemeless_src) + + # secure-media-uploads endpoint prevents anonymous downloads, so we + # need the presigned S3 URL here + src = Upload.signed_url_from_secure_media_url(src) if Upload.secure_media_url?(src) + if hotlinked = download(src) if File.size(hotlinked.path) <= @max_size filename = File.basename(URI.parse(src).path) @@ -162,8 +167,9 @@ module Jobs # make sure we actually have a url return false unless src.present? - # If file is on the forum or CDN domain - if Discourse.store.has_been_uploaded?(src) || src =~ /\A\/[^\/]/i + # If file is on the forum or CDN domain or already has the + # secure media url + if Discourse.store.has_been_uploaded?(src) || src =~ /\A\/[^\/]/i || Upload.secure_media_url?(src) return false if src =~ /\/images\/emoji\// # Someone could hotlink a file from a different site on the same CDN, diff --git a/app/models/post.rb b/app/models/post.rb index 78da6bd0b86..96027645d10 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -307,7 +307,7 @@ class Post < ActiveRecord::Base each_upload_url do |url| uri = URI.parse(url) if FileHelper.is_supported_media?(File.basename(uri.path)) - raw = raw.sub(Discourse.store.s3_upload_host, "#{Discourse.base_url}/secure-media-uploads") + raw = raw.sub(Discourse.store.s3_upload_host, "#{Discourse.base_url}/#{Upload::SECURE_MEDIA_ROUTE}") end end end diff --git a/app/models/upload.rb b/app/models/upload.rb index 09133cc7f8a..20820ba0164 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -9,6 +9,7 @@ class Upload < ActiveRecord::Base SHA1_LENGTH = 40 SEEDED_ID_THRESHOLD = 0 URL_REGEX ||= /(\/original\/\dX[\/\.\w]*\/([a-zA-Z0-9]+)[\.\w]*)/ + SECURE_MEDIA_ROUTE = "secure-media-uploads".freeze belongs_to :user belongs_to :access_control_post, class_name: 'Post' @@ -120,6 +121,19 @@ class Upload < ActiveRecord::Base self.class.short_path(sha1: self.sha1, extension: self.extension) end + def self.secure_media_url?(url) + url.include?(SECURE_MEDIA_ROUTE) + end + + def self.signed_url_from_secure_media_url(url) + secure_upload_s3_path = url.sub(Discourse.base_url, "").sub("/#{SECURE_MEDIA_ROUTE}/", "") + Discourse.store.signed_url_for_path(secure_upload_s3_path) + end + + def self.secure_media_url_from_upload_url(url) + url.sub(SiteSetting.Upload.absolute_base_url, "/#{SECURE_MEDIA_ROUTE}") + end + def self.short_path(sha1:, extension:) @url_helpers ||= Rails.application.routes.url_helpers diff --git a/lib/cooked_post_processor.rb b/lib/cooked_post_processor.rb index 17af21d2f78..1bb64e648d0 100644 --- a/lib/cooked_post_processor.rb +++ b/lib/cooked_post_processor.rb @@ -284,9 +284,8 @@ class CookedPostProcessor # we can't direct FastImage to our secure-media-uploads url because it bounces # anonymous requests with a 404 error - if url&.include?("/secure-media-uploads/") - secure_upload_s3_path = absolute_url.sub(Discourse.base_url, "").sub("/secure-media-uploads/", "") - absolute_url = Discourse.store.signed_url_for_path(secure_upload_s3_path) + if url && Upload.secure_media_url?(url) + absolute_url = Upload.signed_url_from_secure_media_url(absolute_url) end return unless is_valid_image_url?(absolute_url) diff --git a/lib/email/styles.rb b/lib/email/styles.rb index 62f98a89c34..6513ba2a168 100644 --- a/lib/email/styles.rb +++ b/lib/email/styles.rb @@ -288,7 +288,7 @@ module Email def replace_secure_media_urls @fragment.css('[href]').each do |a| - if a['href'][/secure-media-uploads/] + if a['href'][/#{Upload::SECURE_MEDIA_ROUTE}/] a.add_next_sibling "

#{I18n.t("emails.secure_media_placeholder")}

" a.remove end @@ -296,7 +296,7 @@ module Email @fragment.search('img').each do |img| next unless img['src'] - if img['src'][/secure-media-uploads/] + if img['src'][/#{Upload::SECURE_MEDIA_ROUTE}/] img.add_next_sibling "

#{I18n.t("emails.secure_media_placeholder")}

" img.remove end diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb index b01117733a1..d0cb4a87e34 100644 --- a/lib/pretty_text.rb +++ b/lib/pretty_text.rb @@ -388,7 +388,7 @@ module PrettyText def self.strip_secure_media(doc) doc.css("a[href]").each do |a| - if a["href"].include?("/secure-media-uploads/") && FileHelper.is_supported_media?(a["href"]) + if a["href"].include?("/#{Upload::SECURE_MEDIA_ROUTE}/") && FileHelper.is_supported_media?(a["href"]) target = %w(video audio).include?(a&.parent&.parent&.name) ? a.parent.parent : a target.replace "

#{I18n.t("emails.secure_media_placeholder")}

" end diff --git a/lib/pretty_text/helpers.rb b/lib/pretty_text/helpers.rb index 975c104fba1..0de6000fea9 100644 --- a/lib/pretty_text/helpers.rb +++ b/lib/pretty_text/helpers.rb @@ -72,7 +72,7 @@ module PrettyText short_urls.each do |short_url| result[short_url] = { - url: secure_media ? secure_media_url(url) : Discourse.store.cdn_url(url), + url: secure_media ? Upload.secure_media_url_from_upload_url(url) : Discourse.store.cdn_url(url), short_path: Upload.short_path(sha1: sha1, extension: extension), base62_sha1: Upload.base62_sha1(sha1) } @@ -84,10 +84,6 @@ module PrettyText result end - def secure_media_url(url) - url.sub(SiteSetting.Upload.absolute_base_url, "/secure-media-uploads") - end - def get_topic_info(topic_id) return unless topic_id.is_a?(Integer) # TODO this only handles public topics, secured one do not get this diff --git a/lib/url_helper.rb b/lib/url_helper.rb index 51e448e8566..3345a37fd90 100644 --- a/lib/url_helper.rb +++ b/lib/url_helper.rb @@ -57,8 +57,7 @@ class UrlHelper end def self.secure_proxy_without_cdn(url) - url = url.sub(SiteSetting.Upload.absolute_base_url, "/secure-media-uploads") - self.absolute(url, nil) + self.absolute(Upload.secure_media_url_from_upload_url(url), nil) end # Prevents double URL encode diff --git a/spec/jobs/pull_hotlinked_images_spec.rb b/spec/jobs/pull_hotlinked_images_spec.rb index fa37da185c6..3371a73ae4f 100644 --- a/spec/jobs/pull_hotlinked_images_spec.rb +++ b/spec/jobs/pull_hotlinked_images_spec.rb @@ -125,6 +125,19 @@ describe Jobs::PullHotlinkedImages do expect(post.uploads).to contain_exactly(upload) end + context "when secure media enabled" do + it "doesnt redownload the secure upload" do + enable_secure_media + upload = Fabricate(:upload_s3, secure: true) + stub_s3(upload) + url = Upload.secure_media_url_from_upload_url(upload.url) + url = Discourse.base_url + url + post = Fabricate(:post, raw: "") + expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) } + .not_to change { Upload.count } + end + end + it 'replaces markdown image' do post = Fabricate(:post, raw: <<~MD) [![some test](#{image_url})](https://somelink.com) @@ -260,6 +273,16 @@ describe Jobs::PullHotlinkedImages do expect(subject.should_download_image?(Fabricate(:upload).url)).to eq(false) end + context "when secure media enabled" do + it 'should return false for secure-media-upload url' do + enable_secure_media + upload = Fabricate(:upload_s3, secure: true) + stub_s3(upload) + url = Upload.secure_media_url_from_upload_url(upload.url) + expect(subject.should_download_image?(url)).to eq(false) + end + end + it 'should return true for optimized' do src = Discourse.store.get_path_for_optimized_image(Fabricate(:optimized_image)) expect(subject.should_download_image?(src)).to eq(true) @@ -338,4 +361,20 @@ describe Jobs::PullHotlinkedImages do end end + def enable_secure_media + SiteSetting.enable_s3_uploads = true + SiteSetting.s3_upload_bucket = "s3-upload-bucket" + SiteSetting.s3_access_key_id = "some key" + SiteSetting.s3_secret_access_key = "some secrets3_region key" + SiteSetting.secure_media = true + end + + def stub_s3(upload) + stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/") + + stub_request( + :put, + "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl" + ) + end end