FIX: Resolve pull hotlinked image and broken link issues for secure media URLs (#8777)

When pull_hotlinked_images tried to run on posts with secure media (which had already been downloaded from external sources) we were getting a 404 when trying to download the image because the secure endpoint doesn't allow anon downloads.

Also, we were getting into an infinite loop of pull_hotlinked_images because the job didn't consider the secure media URLs as "downloaded" already so it kept trying to download them over and over.

In this PR I have also refactored secure-media-upload URL checks and mutations into single source of truth in Upload, adding a SECURE_MEDIA_ROUTE constant to check URLs against too.
This commit is contained in:
Martin Brennan 2020-01-24 11:59:30 +10:00 committed by GitHub
parent e4b8121650
commit 45b37a8bd1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 69 additions and 16 deletions

View File

@ -62,6 +62,11 @@ module Jobs
schemeless_src = remove_scheme(original_src) schemeless_src = remove_scheme(original_src)
unless downloaded_images.include?(schemeless_src) || large_images.include?(schemeless_src) || broken_images.include?(schemeless_src) unless downloaded_images.include?(schemeless_src) || large_images.include?(schemeless_src) || broken_images.include?(schemeless_src)
# secure-media-uploads endpoint prevents anonymous downloads, so we
# need the presigned S3 URL here
src = Upload.signed_url_from_secure_media_url(src) if Upload.secure_media_url?(src)
if hotlinked = download(src) if hotlinked = download(src)
if File.size(hotlinked.path) <= @max_size if File.size(hotlinked.path) <= @max_size
filename = File.basename(URI.parse(src).path) filename = File.basename(URI.parse(src).path)
@ -162,8 +167,9 @@ module Jobs
# make sure we actually have a url # make sure we actually have a url
return false unless src.present? return false unless src.present?
# If file is on the forum or CDN domain # If file is on the forum or CDN domain or already has the
if Discourse.store.has_been_uploaded?(src) || src =~ /\A\/[^\/]/i # secure media url
if Discourse.store.has_been_uploaded?(src) || src =~ /\A\/[^\/]/i || Upload.secure_media_url?(src)
return false if src =~ /\/images\/emoji\// return false if src =~ /\/images\/emoji\//
# Someone could hotlink a file from a different site on the same CDN, # Someone could hotlink a file from a different site on the same CDN,

View File

@ -307,7 +307,7 @@ class Post < ActiveRecord::Base
each_upload_url do |url| each_upload_url do |url|
uri = URI.parse(url) uri = URI.parse(url)
if FileHelper.is_supported_media?(File.basename(uri.path)) if FileHelper.is_supported_media?(File.basename(uri.path))
raw = raw.sub(Discourse.store.s3_upload_host, "#{Discourse.base_url}/secure-media-uploads") raw = raw.sub(Discourse.store.s3_upload_host, "#{Discourse.base_url}/#{Upload::SECURE_MEDIA_ROUTE}")
end end
end end
end end

View File

@ -9,6 +9,7 @@ class Upload < ActiveRecord::Base
SHA1_LENGTH = 40 SHA1_LENGTH = 40
SEEDED_ID_THRESHOLD = 0 SEEDED_ID_THRESHOLD = 0
URL_REGEX ||= /(\/original\/\dX[\/\.\w]*\/([a-zA-Z0-9]+)[\.\w]*)/ URL_REGEX ||= /(\/original\/\dX[\/\.\w]*\/([a-zA-Z0-9]+)[\.\w]*)/
SECURE_MEDIA_ROUTE = "secure-media-uploads".freeze
belongs_to :user belongs_to :user
belongs_to :access_control_post, class_name: 'Post' belongs_to :access_control_post, class_name: 'Post'
@ -120,6 +121,19 @@ class Upload < ActiveRecord::Base
self.class.short_path(sha1: self.sha1, extension: self.extension) self.class.short_path(sha1: self.sha1, extension: self.extension)
end end
def self.secure_media_url?(url)
url.include?(SECURE_MEDIA_ROUTE)
end
def self.signed_url_from_secure_media_url(url)
secure_upload_s3_path = url.sub(Discourse.base_url, "").sub("/#{SECURE_MEDIA_ROUTE}/", "")
Discourse.store.signed_url_for_path(secure_upload_s3_path)
end
def self.secure_media_url_from_upload_url(url)
url.sub(SiteSetting.Upload.absolute_base_url, "/#{SECURE_MEDIA_ROUTE}")
end
def self.short_path(sha1:, extension:) def self.short_path(sha1:, extension:)
@url_helpers ||= Rails.application.routes.url_helpers @url_helpers ||= Rails.application.routes.url_helpers

View File

@ -284,9 +284,8 @@ class CookedPostProcessor
# we can't direct FastImage to our secure-media-uploads url because it bounces # we can't direct FastImage to our secure-media-uploads url because it bounces
# anonymous requests with a 404 error # anonymous requests with a 404 error
if url&.include?("/secure-media-uploads/") if url && Upload.secure_media_url?(url)
secure_upload_s3_path = absolute_url.sub(Discourse.base_url, "").sub("/secure-media-uploads/", "") absolute_url = Upload.signed_url_from_secure_media_url(absolute_url)
absolute_url = Discourse.store.signed_url_for_path(secure_upload_s3_path)
end end
return unless is_valid_image_url?(absolute_url) return unless is_valid_image_url?(absolute_url)

View File

@ -288,7 +288,7 @@ module Email
def replace_secure_media_urls def replace_secure_media_urls
@fragment.css('[href]').each do |a| @fragment.css('[href]').each do |a|
if a['href'][/secure-media-uploads/] if a['href'][/#{Upload::SECURE_MEDIA_ROUTE}/]
a.add_next_sibling "<p class='secure-media-notice'>#{I18n.t("emails.secure_media_placeholder")}</p>" a.add_next_sibling "<p class='secure-media-notice'>#{I18n.t("emails.secure_media_placeholder")}</p>"
a.remove a.remove
end end
@ -296,7 +296,7 @@ module Email
@fragment.search('img').each do |img| @fragment.search('img').each do |img|
next unless img['src'] next unless img['src']
if img['src'][/secure-media-uploads/] if img['src'][/#{Upload::SECURE_MEDIA_ROUTE}/]
img.add_next_sibling "<p class='secure-media-notice'>#{I18n.t("emails.secure_media_placeholder")}</p>" img.add_next_sibling "<p class='secure-media-notice'>#{I18n.t("emails.secure_media_placeholder")}</p>"
img.remove img.remove
end end

View File

@ -388,7 +388,7 @@ module PrettyText
def self.strip_secure_media(doc) def self.strip_secure_media(doc)
doc.css("a[href]").each do |a| doc.css("a[href]").each do |a|
if a["href"].include?("/secure-media-uploads/") && FileHelper.is_supported_media?(a["href"]) if a["href"].include?("/#{Upload::SECURE_MEDIA_ROUTE}/") && FileHelper.is_supported_media?(a["href"])
target = %w(video audio).include?(a&.parent&.parent&.name) ? a.parent.parent : a target = %w(video audio).include?(a&.parent&.parent&.name) ? a.parent.parent : a
target.replace "<p class='secure-media-notice'>#{I18n.t("emails.secure_media_placeholder")}</p>" target.replace "<p class='secure-media-notice'>#{I18n.t("emails.secure_media_placeholder")}</p>"
end end

View File

@ -72,7 +72,7 @@ module PrettyText
short_urls.each do |short_url| short_urls.each do |short_url|
result[short_url] = { result[short_url] = {
url: secure_media ? secure_media_url(url) : Discourse.store.cdn_url(url), url: secure_media ? Upload.secure_media_url_from_upload_url(url) : Discourse.store.cdn_url(url),
short_path: Upload.short_path(sha1: sha1, extension: extension), short_path: Upload.short_path(sha1: sha1, extension: extension),
base62_sha1: Upload.base62_sha1(sha1) base62_sha1: Upload.base62_sha1(sha1)
} }
@ -84,10 +84,6 @@ module PrettyText
result result
end end
def secure_media_url(url)
url.sub(SiteSetting.Upload.absolute_base_url, "/secure-media-uploads")
end
def get_topic_info(topic_id) def get_topic_info(topic_id)
return unless topic_id.is_a?(Integer) return unless topic_id.is_a?(Integer)
# TODO this only handles public topics, secured one do not get this # TODO this only handles public topics, secured one do not get this

View File

@ -57,8 +57,7 @@ class UrlHelper
end end
def self.secure_proxy_without_cdn(url) def self.secure_proxy_without_cdn(url)
url = url.sub(SiteSetting.Upload.absolute_base_url, "/secure-media-uploads") self.absolute(Upload.secure_media_url_from_upload_url(url), nil)
self.absolute(url, nil)
end end
# Prevents double URL encode # Prevents double URL encode

View File

@ -125,6 +125,19 @@ describe Jobs::PullHotlinkedImages do
expect(post.uploads).to contain_exactly(upload) expect(post.uploads).to contain_exactly(upload)
end end
context "when secure media enabled" do
it "doesnt redownload the secure upload" do
enable_secure_media
upload = Fabricate(:upload_s3, secure: true)
stub_s3(upload)
url = Upload.secure_media_url_from_upload_url(upload.url)
url = Discourse.base_url + url
post = Fabricate(:post, raw: "<img src='#{url}'>")
expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) }
.not_to change { Upload.count }
end
end
it 'replaces markdown image' do it 'replaces markdown image' do
post = Fabricate(:post, raw: <<~MD) post = Fabricate(:post, raw: <<~MD)
[![some test](#{image_url})](https://somelink.com) [![some test](#{image_url})](https://somelink.com)
@ -260,6 +273,16 @@ describe Jobs::PullHotlinkedImages do
expect(subject.should_download_image?(Fabricate(:upload).url)).to eq(false) expect(subject.should_download_image?(Fabricate(:upload).url)).to eq(false)
end end
context "when secure media enabled" do
it 'should return false for secure-media-upload url' do
enable_secure_media
upload = Fabricate(:upload_s3, secure: true)
stub_s3(upload)
url = Upload.secure_media_url_from_upload_url(upload.url)
expect(subject.should_download_image?(url)).to eq(false)
end
end
it 'should return true for optimized' do it 'should return true for optimized' do
src = Discourse.store.get_path_for_optimized_image(Fabricate(:optimized_image)) src = Discourse.store.get_path_for_optimized_image(Fabricate(:optimized_image))
expect(subject.should_download_image?(src)).to eq(true) expect(subject.should_download_image?(src)).to eq(true)
@ -338,4 +361,20 @@ describe Jobs::PullHotlinkedImages do
end end
end end
def enable_secure_media
SiteSetting.enable_s3_uploads = true
SiteSetting.s3_upload_bucket = "s3-upload-bucket"
SiteSetting.s3_access_key_id = "some key"
SiteSetting.s3_secret_access_key = "some secrets3_region key"
SiteSetting.secure_media = true
end
def stub_s3(upload)
stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/")
stub_request(
:put,
"https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl"
)
end
end end