FIX: Resolve pull hotlinked image and broken link issues for secure media URLs (#8777)
When pull_hotlinked_images tried to run on posts with secure media (which had already been downloaded from external sources) we were getting a 404 when trying to download the image because the secure endpoint doesn't allow anon downloads. Also, we were getting into an infinite loop of pull_hotlinked_images because the job didn't consider the secure media URLs as "downloaded" already so it kept trying to download them over and over. In this PR I have also refactored secure-media-upload URL checks and mutations into single source of truth in Upload, adding a SECURE_MEDIA_ROUTE constant to check URLs against too.
This commit is contained in:
parent
e4b8121650
commit
45b37a8bd1
|
@ -62,6 +62,11 @@ module Jobs
|
||||||
schemeless_src = remove_scheme(original_src)
|
schemeless_src = remove_scheme(original_src)
|
||||||
|
|
||||||
unless downloaded_images.include?(schemeless_src) || large_images.include?(schemeless_src) || broken_images.include?(schemeless_src)
|
unless downloaded_images.include?(schemeless_src) || large_images.include?(schemeless_src) || broken_images.include?(schemeless_src)
|
||||||
|
|
||||||
|
# secure-media-uploads endpoint prevents anonymous downloads, so we
|
||||||
|
# need the presigned S3 URL here
|
||||||
|
src = Upload.signed_url_from_secure_media_url(src) if Upload.secure_media_url?(src)
|
||||||
|
|
||||||
if hotlinked = download(src)
|
if hotlinked = download(src)
|
||||||
if File.size(hotlinked.path) <= @max_size
|
if File.size(hotlinked.path) <= @max_size
|
||||||
filename = File.basename(URI.parse(src).path)
|
filename = File.basename(URI.parse(src).path)
|
||||||
|
@ -162,8 +167,9 @@ module Jobs
|
||||||
# make sure we actually have a url
|
# make sure we actually have a url
|
||||||
return false unless src.present?
|
return false unless src.present?
|
||||||
|
|
||||||
# If file is on the forum or CDN domain
|
# If file is on the forum or CDN domain or already has the
|
||||||
if Discourse.store.has_been_uploaded?(src) || src =~ /\A\/[^\/]/i
|
# secure media url
|
||||||
|
if Discourse.store.has_been_uploaded?(src) || src =~ /\A\/[^\/]/i || Upload.secure_media_url?(src)
|
||||||
return false if src =~ /\/images\/emoji\//
|
return false if src =~ /\/images\/emoji\//
|
||||||
|
|
||||||
# Someone could hotlink a file from a different site on the same CDN,
|
# Someone could hotlink a file from a different site on the same CDN,
|
||||||
|
|
|
@ -307,7 +307,7 @@ class Post < ActiveRecord::Base
|
||||||
each_upload_url do |url|
|
each_upload_url do |url|
|
||||||
uri = URI.parse(url)
|
uri = URI.parse(url)
|
||||||
if FileHelper.is_supported_media?(File.basename(uri.path))
|
if FileHelper.is_supported_media?(File.basename(uri.path))
|
||||||
raw = raw.sub(Discourse.store.s3_upload_host, "#{Discourse.base_url}/secure-media-uploads")
|
raw = raw.sub(Discourse.store.s3_upload_host, "#{Discourse.base_url}/#{Upload::SECURE_MEDIA_ROUTE}")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -9,6 +9,7 @@ class Upload < ActiveRecord::Base
|
||||||
SHA1_LENGTH = 40
|
SHA1_LENGTH = 40
|
||||||
SEEDED_ID_THRESHOLD = 0
|
SEEDED_ID_THRESHOLD = 0
|
||||||
URL_REGEX ||= /(\/original\/\dX[\/\.\w]*\/([a-zA-Z0-9]+)[\.\w]*)/
|
URL_REGEX ||= /(\/original\/\dX[\/\.\w]*\/([a-zA-Z0-9]+)[\.\w]*)/
|
||||||
|
SECURE_MEDIA_ROUTE = "secure-media-uploads".freeze
|
||||||
|
|
||||||
belongs_to :user
|
belongs_to :user
|
||||||
belongs_to :access_control_post, class_name: 'Post'
|
belongs_to :access_control_post, class_name: 'Post'
|
||||||
|
@ -120,6 +121,19 @@ class Upload < ActiveRecord::Base
|
||||||
self.class.short_path(sha1: self.sha1, extension: self.extension)
|
self.class.short_path(sha1: self.sha1, extension: self.extension)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.secure_media_url?(url)
|
||||||
|
url.include?(SECURE_MEDIA_ROUTE)
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.signed_url_from_secure_media_url(url)
|
||||||
|
secure_upload_s3_path = url.sub(Discourse.base_url, "").sub("/#{SECURE_MEDIA_ROUTE}/", "")
|
||||||
|
Discourse.store.signed_url_for_path(secure_upload_s3_path)
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.secure_media_url_from_upload_url(url)
|
||||||
|
url.sub(SiteSetting.Upload.absolute_base_url, "/#{SECURE_MEDIA_ROUTE}")
|
||||||
|
end
|
||||||
|
|
||||||
def self.short_path(sha1:, extension:)
|
def self.short_path(sha1:, extension:)
|
||||||
@url_helpers ||= Rails.application.routes.url_helpers
|
@url_helpers ||= Rails.application.routes.url_helpers
|
||||||
|
|
||||||
|
|
|
@ -284,9 +284,8 @@ class CookedPostProcessor
|
||||||
|
|
||||||
# we can't direct FastImage to our secure-media-uploads url because it bounces
|
# we can't direct FastImage to our secure-media-uploads url because it bounces
|
||||||
# anonymous requests with a 404 error
|
# anonymous requests with a 404 error
|
||||||
if url&.include?("/secure-media-uploads/")
|
if url && Upload.secure_media_url?(url)
|
||||||
secure_upload_s3_path = absolute_url.sub(Discourse.base_url, "").sub("/secure-media-uploads/", "")
|
absolute_url = Upload.signed_url_from_secure_media_url(absolute_url)
|
||||||
absolute_url = Discourse.store.signed_url_for_path(secure_upload_s3_path)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
return unless is_valid_image_url?(absolute_url)
|
return unless is_valid_image_url?(absolute_url)
|
||||||
|
|
|
@ -288,7 +288,7 @@ module Email
|
||||||
|
|
||||||
def replace_secure_media_urls
|
def replace_secure_media_urls
|
||||||
@fragment.css('[href]').each do |a|
|
@fragment.css('[href]').each do |a|
|
||||||
if a['href'][/secure-media-uploads/]
|
if a['href'][/#{Upload::SECURE_MEDIA_ROUTE}/]
|
||||||
a.add_next_sibling "<p class='secure-media-notice'>#{I18n.t("emails.secure_media_placeholder")}</p>"
|
a.add_next_sibling "<p class='secure-media-notice'>#{I18n.t("emails.secure_media_placeholder")}</p>"
|
||||||
a.remove
|
a.remove
|
||||||
end
|
end
|
||||||
|
@ -296,7 +296,7 @@ module Email
|
||||||
|
|
||||||
@fragment.search('img').each do |img|
|
@fragment.search('img').each do |img|
|
||||||
next unless img['src']
|
next unless img['src']
|
||||||
if img['src'][/secure-media-uploads/]
|
if img['src'][/#{Upload::SECURE_MEDIA_ROUTE}/]
|
||||||
img.add_next_sibling "<p class='secure-media-notice'>#{I18n.t("emails.secure_media_placeholder")}</p>"
|
img.add_next_sibling "<p class='secure-media-notice'>#{I18n.t("emails.secure_media_placeholder")}</p>"
|
||||||
img.remove
|
img.remove
|
||||||
end
|
end
|
||||||
|
|
|
@ -388,7 +388,7 @@ module PrettyText
|
||||||
|
|
||||||
def self.strip_secure_media(doc)
|
def self.strip_secure_media(doc)
|
||||||
doc.css("a[href]").each do |a|
|
doc.css("a[href]").each do |a|
|
||||||
if a["href"].include?("/secure-media-uploads/") && FileHelper.is_supported_media?(a["href"])
|
if a["href"].include?("/#{Upload::SECURE_MEDIA_ROUTE}/") && FileHelper.is_supported_media?(a["href"])
|
||||||
target = %w(video audio).include?(a&.parent&.parent&.name) ? a.parent.parent : a
|
target = %w(video audio).include?(a&.parent&.parent&.name) ? a.parent.parent : a
|
||||||
target.replace "<p class='secure-media-notice'>#{I18n.t("emails.secure_media_placeholder")}</p>"
|
target.replace "<p class='secure-media-notice'>#{I18n.t("emails.secure_media_placeholder")}</p>"
|
||||||
end
|
end
|
||||||
|
|
|
@ -72,7 +72,7 @@ module PrettyText
|
||||||
|
|
||||||
short_urls.each do |short_url|
|
short_urls.each do |short_url|
|
||||||
result[short_url] = {
|
result[short_url] = {
|
||||||
url: secure_media ? secure_media_url(url) : Discourse.store.cdn_url(url),
|
url: secure_media ? Upload.secure_media_url_from_upload_url(url) : Discourse.store.cdn_url(url),
|
||||||
short_path: Upload.short_path(sha1: sha1, extension: extension),
|
short_path: Upload.short_path(sha1: sha1, extension: extension),
|
||||||
base62_sha1: Upload.base62_sha1(sha1)
|
base62_sha1: Upload.base62_sha1(sha1)
|
||||||
}
|
}
|
||||||
|
@ -84,10 +84,6 @@ module PrettyText
|
||||||
result
|
result
|
||||||
end
|
end
|
||||||
|
|
||||||
def secure_media_url(url)
|
|
||||||
url.sub(SiteSetting.Upload.absolute_base_url, "/secure-media-uploads")
|
|
||||||
end
|
|
||||||
|
|
||||||
def get_topic_info(topic_id)
|
def get_topic_info(topic_id)
|
||||||
return unless topic_id.is_a?(Integer)
|
return unless topic_id.is_a?(Integer)
|
||||||
# TODO this only handles public topics, secured one do not get this
|
# TODO this only handles public topics, secured one do not get this
|
||||||
|
|
|
@ -57,8 +57,7 @@ class UrlHelper
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.secure_proxy_without_cdn(url)
|
def self.secure_proxy_without_cdn(url)
|
||||||
url = url.sub(SiteSetting.Upload.absolute_base_url, "/secure-media-uploads")
|
self.absolute(Upload.secure_media_url_from_upload_url(url), nil)
|
||||||
self.absolute(url, nil)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# Prevents double URL encode
|
# Prevents double URL encode
|
||||||
|
|
|
@ -125,6 +125,19 @@ describe Jobs::PullHotlinkedImages do
|
||||||
expect(post.uploads).to contain_exactly(upload)
|
expect(post.uploads).to contain_exactly(upload)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "when secure media enabled" do
|
||||||
|
it "doesnt redownload the secure upload" do
|
||||||
|
enable_secure_media
|
||||||
|
upload = Fabricate(:upload_s3, secure: true)
|
||||||
|
stub_s3(upload)
|
||||||
|
url = Upload.secure_media_url_from_upload_url(upload.url)
|
||||||
|
url = Discourse.base_url + url
|
||||||
|
post = Fabricate(:post, raw: "<img src='#{url}'>")
|
||||||
|
expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) }
|
||||||
|
.not_to change { Upload.count }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
it 'replaces markdown image' do
|
it 'replaces markdown image' do
|
||||||
post = Fabricate(:post, raw: <<~MD)
|
post = Fabricate(:post, raw: <<~MD)
|
||||||
[![some test](#{image_url})](https://somelink.com)
|
[![some test](#{image_url})](https://somelink.com)
|
||||||
|
@ -260,6 +273,16 @@ describe Jobs::PullHotlinkedImages do
|
||||||
expect(subject.should_download_image?(Fabricate(:upload).url)).to eq(false)
|
expect(subject.should_download_image?(Fabricate(:upload).url)).to eq(false)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "when secure media enabled" do
|
||||||
|
it 'should return false for secure-media-upload url' do
|
||||||
|
enable_secure_media
|
||||||
|
upload = Fabricate(:upload_s3, secure: true)
|
||||||
|
stub_s3(upload)
|
||||||
|
url = Upload.secure_media_url_from_upload_url(upload.url)
|
||||||
|
expect(subject.should_download_image?(url)).to eq(false)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
it 'should return true for optimized' do
|
it 'should return true for optimized' do
|
||||||
src = Discourse.store.get_path_for_optimized_image(Fabricate(:optimized_image))
|
src = Discourse.store.get_path_for_optimized_image(Fabricate(:optimized_image))
|
||||||
expect(subject.should_download_image?(src)).to eq(true)
|
expect(subject.should_download_image?(src)).to eq(true)
|
||||||
|
@ -338,4 +361,20 @@ describe Jobs::PullHotlinkedImages do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def enable_secure_media
|
||||||
|
SiteSetting.enable_s3_uploads = true
|
||||||
|
SiteSetting.s3_upload_bucket = "s3-upload-bucket"
|
||||||
|
SiteSetting.s3_access_key_id = "some key"
|
||||||
|
SiteSetting.s3_secret_access_key = "some secrets3_region key"
|
||||||
|
SiteSetting.secure_media = true
|
||||||
|
end
|
||||||
|
|
||||||
|
def stub_s3(upload)
|
||||||
|
stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/")
|
||||||
|
|
||||||
|
stub_request(
|
||||||
|
:put,
|
||||||
|
"https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl"
|
||||||
|
)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue