FIX: Mitigate issue where legacy pre-secure hotlinked media would not be redownloaded (#8802)

Basically, say you had already downloaded a certain image from a certain URL
using pull_hotlinked_images and the onebox. The upload would be stored
by its sha as an upload record. Whenever you linked to the same URL again
in a post (e.g. in our case an og:image on review.discourse) we would
would reuse the original upload record because of the sha1.

However when you turned on secure media this could cause problems as
the first post that uses that upload after secure media is enabled
will set the access control post for the upload to the new post.
Then if the post is deleted every single onebox/link to that same image
URL will fail forever with 403 as the secure-media-uploads URL fails
if the access control post has been deleted.

To fix this when cooking posts and pulling hotlinked images, we only
allow using an original upload by URL if its access control post
matches the current post, and if the original_sha1 is filled in,
meaning it was uploaded AFTER secure media was enabled. otherwise
we just redownload the media again to be safe, as the URL will always
be new then.
This commit is contained in:
Martin Brennan 2020-01-29 10:11:38 +10:00 committed by GitHub
parent 20e7fb1c95
commit ab3bda6cd0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 183 additions and 28 deletions

View File

@ -56,7 +56,7 @@ module Jobs
src = original_src = node['src'] || node['href']
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
if should_download_image?(src)
if should_download_image?(src, post)
begin
# have we already downloaded that file?
schemeless_src = remove_scheme(original_src)
@ -89,6 +89,7 @@ module Jobs
has_new_broken_image = true
end
end
# have we successfully downloaded that file?
if downloaded_urls[src].present?
escaped_src = Regexp.escape(original_src)
@ -163,7 +164,7 @@ module Jobs
doc.css(".lightbox img[src]")
end
def should_download_image?(src)
def should_download_image?(src, post = nil)
# make sure we actually have a url
return false unless src.present?
@ -174,7 +175,15 @@ module Jobs
# Someone could hotlink a file from a different site on the same CDN,
# so check whether we have it in this database
return !Upload.get_from_url(src)
#
# if the upload already exists and is attached to a different post,
# or the original_sha1 is missing meaning it was created before secure
# media was enabled, then we definitely want to redownload again otherwise
# we end up reusing existing uploads which may be linked to many posts
# already.
upload = Upload.consider_for_reuse(Upload.get_from_url(src), post)
return !upload.present?
end
# Don't download non-local images unless site setting enabled

View File

@ -121,6 +121,12 @@ class Upload < ActiveRecord::Base
self.class.short_path(sha1: self.sha1, extension: self.extension)
end
def self.consider_for_reuse(upload, post)
return upload if !SiteSetting.secure_media? || upload.blank? || post.blank?
return nil if upload.access_control_post_id != post.id || upload.original_sha1.blank?
upload
end
def self.secure_media_url?(url)
url.include?(SECURE_MEDIA_ROUTE)
end

View File

@ -330,7 +330,13 @@ class CookedPostProcessor
img["height"] = height
end
if upload = Upload.get_from_url(src)
# if the upload already exists and is attached to a different post,
# or the original_sha1 is missing meaning it was created before secure
# media was enabled. we want to re-thumbnail and re-optimize in this case
# to avoid using uploads linked to many other posts
upload = Upload.consider_for_reuse(Upload.get_from_url(src), @post)
if upload.present?
upload.create_thumbnail!(width, height, crop: crop)
each_responsive_ratio do |ratio|
@ -351,7 +357,9 @@ class CookedPostProcessor
add_lightbox!(img, original_width, original_height, upload, cropped: crop)
end
optimize_image!(img, upload, cropped: crop) if upload
if upload.present?
optimize_image!(img, upload, cropped: crop)
end
end
def loading_image(upload)

View File

@ -504,6 +504,7 @@ describe CookedPostProcessor do
end
context "s3_uploads" do
let(:upload) { Fabricate(:secure_upload_s3) }
before do
s3_setup
stored_path = Discourse.store.get_path_for_upload(upload)
@ -516,13 +517,8 @@ describe CookedPostProcessor do
)
stub_request(:get, /#{SiteSetting.s3_upload_bucket}\.s3\.amazonaws\.com/)
OptimizedImage.expects(:resize).returns(true)
FileStore::BaseStore.any_instance.expects(:get_depth_for).returns(0)
Discourse.store.class.any_instance.expects(:has_been_uploaded?).at_least_once.returns(true)
SiteSetting.login_required = true
SiteSetting.secure_media = true
upload.update_column(:secure, true)
end
let(:optimized_size) { "600x500" }
@ -531,15 +527,43 @@ describe CookedPostProcessor do
Fabricate(:post, raw: "![large.png|#{optimized_size}](#{upload.short_url})")
end
it "handles secure images with the correct lightbox link href" do
cpp.post_process
expect(cpp.html).to match_html <<~HTML
let(:cooked_html) do
<<~HTML
<p><div class="lightbox-wrapper"><a class="lightbox" href="//test.localhost/secure-media-uploads/original/1X/#{upload.sha1}.png" data-download-href="//test.localhost/uploads/short-url/#{upload.base62_sha1}.unknown?dl=1" title="large.png"><img src="" alt="large.png" data-base62-sha1="#{upload.base62_sha1}" width="600" height="500"><div class="meta">
<svg class="fa d-icon d-icon-far-image svg-icon" aria-hidden="true"><use xlink:href="#far-image"></use></svg><span class="filename">large.png</span><span class="informations">1750×2000 1.21 KB</span><svg class="fa d-icon d-icon-discourse-expand svg-icon" aria-hidden="true"><use xlink:href="#discourse-expand"></use></svg>
</div></a></div></p>
HTML
end
context "when the upload is attached to the correct post" do
before do
OptimizedImage.expects(:resize).returns(true)
FileStore::BaseStore.any_instance.expects(:get_depth_for).returns(0)
Discourse.store.class.any_instance.expects(:has_been_uploaded?).at_least_once.returns(true)
upload.update(secure: true, access_control_post: post)
end
it "handles secure images with the correct lightbox link href" do
cpp.post_process
expect(cpp.html).to match_html cooked_html
end
end
context "when the upload is attached to a different post" do
before do
FastImage.size(upload.url)
upload.update(secure: true, access_control_post: Fabricate(:post))
end
it "does not create thumbnails or optimize images" do
CookedPostProcessor.any_instance.expects(:optimize_image!).never
Upload.any_instance.expects(:create_thumbnail!).never
cpp.post_process
expect(cpp.html).not_to match_html cooked_html
end
end
end
end

View File

@ -51,3 +51,9 @@ Fabricator(:upload_s3, from: :upload) do
end
end
end
Fabricator(:secure_upload_s3, from: :upload_s3) do
secure { true }
sha1 { SecureRandom.hex(20) }
original_sha1 { sequence(:sha1) { |n| Digest::SHA1.hexdigest(n.to_s) } }
end

View File

@ -125,17 +125,56 @@ describe Jobs::PullHotlinkedImages do
expect(post.uploads).to contain_exactly(upload)
end
context "when secure media enabled" do
context "when secure media enabled for an upload that has already been downloaded and exists" do
it "doesnt redownload the secure upload" do
enable_secure_media
upload = Fabricate(:upload_s3, secure: true)
upload = Fabricate(:secure_upload_s3, secure: true)
stub_s3(upload)
url = Upload.secure_media_url_from_upload_url(upload.url)
url = Discourse.base_url + url
post = Fabricate(:post, raw: "<img src='#{url}'>")
upload.update(access_control_post: post)
expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) }
.not_to change { Upload.count }
end
context "when the upload original_sha1 is missing" do
it "redownloads the upload" do
enable_secure_media
upload = Fabricate(:upload_s3, secure: true)
stub_s3(upload)
Upload.stubs(:signed_url_from_secure_media_url).returns(upload.url)
url = Upload.secure_media_url_from_upload_url(upload.url)
url = Discourse.base_url + url
post = Fabricate(:post, raw: "<img src='#{url}'>")
upload.update(access_control_post: post)
FileStore::S3Store.any_instance.stubs(:store_upload).returns(upload.url)
# without this we get an infinite hang...
Post.any_instance.stubs(:trigger_post_process)
expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) }
.to change { Upload.count }.by(1)
end
end
context "when the upload access_control_post is different to the current post" do
it "redownloads the upload" do
enable_secure_media
upload = Fabricate(:secure_upload_s3, secure: true)
stub_s3(upload)
Upload.stubs(:signed_url_from_secure_media_url).returns(upload.url)
url = Upload.secure_media_url_from_upload_url(upload.url)
url = Discourse.base_url + url
post = Fabricate(:post, raw: "<img src='#{url}'>")
upload.update(access_control_post: Fabricate(:post))
FileStore::S3Store.any_instance.stubs(:store_upload).returns(upload.url)
# without this we get an infinite hang...
Post.any_instance.stubs(:trigger_post_process)
expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) }
.to change { Upload.count }.by(1)
end
end
end
it 'replaces markdown image' do
@ -376,5 +415,7 @@ describe Jobs::PullHotlinkedImages do
:put,
"https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl"
)
stub_request(:get, "https:" + upload.url).to_return(status: 200, body: file_from_fixtures("smallest.png"))
# stub_request(:get, /#{SiteSetting.s3_upload_bucket}\.s3\.amazonaws\.com/)
end
end

View File

@ -131,6 +131,11 @@ describe TopicConverter do
public_reply.link_post_uploads
public_reply.update_uploads_secure_status
# we need to do this otherwise we get infinite loop shenanigans, as the pull hotlinked
# images is never satisfied and keeps calling cooked post processor which in turn runs
# pull hotlinked images
Jobs::PullHotlinkedImages.stubs(:new).returns(Helpers::StubbedJob.new)
expect(public_reply.uploads[0].secure).to eq(false)
public_topic.convert_to_private_message(admin)
expect(public_topic.reload.posts.find(public_reply.id).uploads[0].secure).to eq(true)

View File

@ -288,6 +288,53 @@ describe Upload do
end
end
describe ".consider_for_reuse" do
let(:post) { Fabricate(:post) }
let(:upload) { Fabricate(:upload) }
it "returns nil when the provided upload is blank" do
expect(Upload.consider_for_reuse(nil, post)).to eq(nil)
end
it "returns the upload when secure media is disabled" do
expect(Upload.consider_for_reuse(upload, post)).to eq(upload)
end
context "when secure media enabled" do
before do
enable_secure_media
end
context "when the upload access control post is != to the provided post" do
before do
upload.update(access_control_post_id: Fabricate(:post).id)
end
it "returns nil" do
expect(Upload.consider_for_reuse(upload, post)).to eq(nil)
end
end
context "when the upload original_sha1 is blank (pre-secure-media upload)" do
before do
upload.update(original_sha1: nil, access_control_post: post)
end
it "returns nil" do
expect(Upload.consider_for_reuse(upload, post)).to eq(nil)
end
end
context "when the upload original_sha1 is present and access control post is correct" do
let(:upload) { Fabricate(:secure_upload_s3, access_control_post: post) }
it "returns the upload" do
expect(Upload.consider_for_reuse(upload, post)).to eq(upload)
end
end
end
end
describe '.update_secure_status' do
it "respects the secure_override_value parameter if provided" do
upload.update!(secure: true)
@ -344,18 +391,7 @@ describe Upload do
context "secure media enabled" do
before do
SiteSetting.enable_s3_uploads = true
SiteSetting.s3_upload_bucket = "s3-upload-bucket"
SiteSetting.s3_access_key_id = "some key"
SiteSetting.s3_secret_access_key = "some secrets3_region key"
SiteSetting.secure_media = true
stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/")
stub_request(
:put,
"https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl"
)
enable_secure_media
end
it 'does not mark an image upload as not secure when there is no access control post id, to avoid unintentional exposure' do
@ -399,4 +435,19 @@ describe Upload do
expect(upload2.reload.extension).to eq("png")
end
end
def enable_secure_media
SiteSetting.enable_s3_uploads = true
SiteSetting.s3_upload_bucket = "s3-upload-bucket"
SiteSetting.s3_access_key_id = "some key"
SiteSetting.s3_secret_access_key = "some secrets3_region key"
SiteSetting.secure_media = true
stub_request(:head, "https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/")
stub_request(
:put,
"https://#{SiteSetting.s3_upload_bucket}.s3.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}?acl"
)
end
end

View File

@ -147,4 +147,9 @@ module Helpers
ActionController::Base.config.relative_url_root = old_root
end
end
class StubbedJob
def initialize; end
def perform(args); end
end
end