FEATURE: Pull hotlinked images immediately after posting
Previously, with the default `editing_grace_period`, hotlinked images were pulled 5 minutes after a post is created. This delay was added to reduce the chance of automated edits clashing with user edits. This commit refactors things so that we can pull hotlinked images immediately. URLs are immediately updated in the post's `cooked` HTML. The post's raw markdown is updated later, after the `editing_grace_period`. This involves a number of behind-the-scenes changes including: - Schedule Jobs::PullHotlinkedImages immediately after Jobs::ProcessPost. Move scheduling to after the `update_column` call to avoid race conditions - Move raw changes into a separate job, which is delayed until after the ninja-edit window - Move disable_if_low_on_disk_space logic into the `pull_hotlinked_images` job - Move raw-parsing/replacing logic into `InlineUpload` so it can be easily be shared between `UpdateHotlinkedRaw` and `PullUserProfileHotlinkedImages`
This commit is contained in:
parent
0baabafa9d
commit
bf6f8299a7
|
@ -40,6 +40,8 @@ module Jobs
|
|||
end
|
||||
end
|
||||
|
||||
enqueue_pull_hotlinked_images(post) unless args[:skip_pull_hotlinked_images]
|
||||
|
||||
if !post.user&.staff? && !post.user&.staged?
|
||||
s = post.raw
|
||||
s << " #{post.topic.title}" if post.post_number == 1
|
||||
|
@ -60,6 +62,11 @@ module Jobs
|
|||
TopicLink.extract_from(post)
|
||||
QuotedPost.extract_from(post)
|
||||
end
|
||||
|
||||
def enqueue_pull_hotlinked_images(post)
|
||||
Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: post.id)
|
||||
Jobs.enqueue(:pull_hotlinked_images, post_id: post.id)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -10,16 +10,13 @@ module Jobs
|
|||
end
|
||||
|
||||
def execute(args)
|
||||
disable_if_low_on_disk_space
|
||||
|
||||
@post_id = args[:post_id]
|
||||
raise Discourse::InvalidParameters.new(:post_id) if @post_id.blank?
|
||||
|
||||
post = Post.find_by(id: @post_id)
|
||||
return if post.blank?
|
||||
return if post.topic.blank?
|
||||
return if post.cook_method == Post.cook_methods[:raw_html]
|
||||
|
||||
raw = post.raw.dup
|
||||
start_raw = raw.dup
|
||||
return if post.nil? || post.topic.nil?
|
||||
|
||||
hotlinked_map = post.post_hotlinked_media.map { |r| [r.url, r] }.to_h
|
||||
|
||||
|
@ -55,30 +52,23 @@ module Jobs
|
|||
changed_hotlink_records = true
|
||||
hotlink_record.save!
|
||||
end
|
||||
|
||||
# have we successfully downloaded that file?
|
||||
if upload = hotlink_record&.upload
|
||||
raw = replace_in_raw(original_src: original_src, upload: upload, raw: raw)
|
||||
end
|
||||
rescue => e
|
||||
raise e if Rails.env.test?
|
||||
log(:error, "Failed to pull hotlinked image (#{download_src}) post: #{@post_id}\n" + e.message + "\n" + e.backtrace.join("\n"))
|
||||
end
|
||||
|
||||
# If post changed while we were downloading images, never apply edits
|
||||
post.reload
|
||||
post_changed_elsewhere = (start_raw != post.raw)
|
||||
raw_changed_here = (raw != post.raw)
|
||||
|
||||
if !post_changed_elsewhere && raw_changed_here
|
||||
changes = { raw: raw, edit_reason: I18n.t("upload.edit_reason") }
|
||||
post.revise(Discourse.system_user, changes, bypass_bump: true, skip_staff_log: true)
|
||||
elsif changed_hotlink_records
|
||||
if changed_hotlink_records
|
||||
post.trigger_post_process(
|
||||
bypass_bump: true,
|
||||
skip_pull_hotlinked_images: true # Avoid an infinite loop of job scheduling
|
||||
)
|
||||
end
|
||||
|
||||
if hotlinked_map.size > 0
|
||||
Jobs.cancel_scheduled_job(:update_hotlinked_raw, post_id: post.id)
|
||||
update_raw_delay = SiteSetting.editing_grace_period + 1
|
||||
Jobs.enqueue_in(update_raw_delay, :update_hotlinked_raw, post_id: post.id)
|
||||
end
|
||||
end
|
||||
|
||||
def download(src)
|
||||
|
@ -137,45 +127,6 @@ module Jobs
|
|||
end
|
||||
end
|
||||
|
||||
def replace_in_raw(original_src:, raw:, upload:)
|
||||
raw = raw.dup
|
||||
escaped_src = Regexp.escape(original_src)
|
||||
|
||||
replace_raw = ->(match, match_src, replacement, _index) {
|
||||
if normalize_src(original_src) == normalize_src(match_src)
|
||||
replacement =
|
||||
if replacement.include?(InlineUploads::PLACEHOLDER)
|
||||
replacement.sub(InlineUploads::PLACEHOLDER, upload.short_url)
|
||||
elsif replacement.include?(InlineUploads::PATH_PLACEHOLDER)
|
||||
replacement.sub(InlineUploads::PATH_PLACEHOLDER, upload.short_path)
|
||||
end
|
||||
|
||||
raw = raw.gsub(
|
||||
match,
|
||||
replacement
|
||||
)
|
||||
end
|
||||
}
|
||||
|
||||
# there are 6 ways to insert an image in a post
|
||||
# HTML tag - <img src="http://...">
|
||||
InlineUploads.match_img(raw, external_src: true, &replace_raw)
|
||||
|
||||
# BBCode tag - [img]http://...[/img]
|
||||
InlineUploads.match_bbcode_img(raw, external_src: true, &replace_raw)
|
||||
|
||||
# Markdown linked image - [![alt](http://...)](http://...)
|
||||
# Markdown inline - ![alt](http://...)
|
||||
# Markdown inline - ![](http://... "image title")
|
||||
# Markdown inline - ![alt](http://... "image title")
|
||||
InlineUploads.match_md_inline_img(raw, external_src: true, &replace_raw)
|
||||
|
||||
# Direct link
|
||||
raw.gsub!(/^#{escaped_src}(\s?)$/) { "![](#{upload.short_url})#{$1}" }
|
||||
|
||||
raw
|
||||
end
|
||||
|
||||
def extract_images_from(html)
|
||||
doc = Nokogiri::HTML5::fragment(html)
|
||||
|
||||
|
@ -239,6 +190,30 @@ module Jobs
|
|||
def normalize_src(src)
|
||||
PostHotlinkedMedia.normalize_src(src)
|
||||
end
|
||||
|
||||
def disable_if_low_on_disk_space
|
||||
return if Discourse.store.external?
|
||||
return if !SiteSetting.download_remote_images_to_local
|
||||
return if available_disk_space >= SiteSetting.download_remote_images_threshold
|
||||
|
||||
SiteSetting.download_remote_images_to_local = false
|
||||
|
||||
# log the site setting change
|
||||
reason = I18n.t("disable_remote_images_download_reason")
|
||||
staff_action_logger = StaffActionLogger.new(Discourse.system_user)
|
||||
staff_action_logger.log_site_setting_change("download_remote_images_to_local", true, false, details: reason)
|
||||
|
||||
# also send a private message to the site contact user notify_about_low_disk_space
|
||||
notify_about_low_disk_space
|
||||
end
|
||||
|
||||
def notify_about_low_disk_space
|
||||
SystemMessage.create_from_system_user(Discourse.site_contact_user, :download_remote_images_disabled)
|
||||
end
|
||||
|
||||
def available_disk_space
|
||||
100 - DiskSpace.percent_free("#{Rails.root}/public/uploads")
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -30,16 +30,16 @@ module Jobs
|
|||
rescue ImageBrokenError
|
||||
broken_image_urls << normalized_src
|
||||
end
|
||||
|
||||
# have we successfully downloaded that file?
|
||||
if upload = downloaded_images[normalized_src]
|
||||
user_profile.bio_raw = replace_in_raw(original_src: original_src, upload: upload, raw: user_profile.bio_raw)
|
||||
end
|
||||
rescue => e
|
||||
raise e if Rails.env.test?
|
||||
log(:error, "Failed to pull hotlinked image (#{download_src}) user: #{@user_id}\n" + e.message + "\n" + e.backtrace.join("\n"))
|
||||
end
|
||||
|
||||
user_profile.bio_raw = InlineUploads.replace_hotlinked_image_urls(raw: user_profile.bio_raw) do |match_src|
|
||||
normalized_match_src = PostHotlinkedMedia.normalize_src(match_src)
|
||||
downloaded_images[normalized_match_src]
|
||||
end
|
||||
|
||||
user_profile.skip_pull_hotlinked_image = true
|
||||
user_profile.save!
|
||||
end
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Jobs
|
||||
class UpdateHotlinkedRaw < ::Jobs::Base
|
||||
sidekiq_options queue: 'low'
|
||||
|
||||
def execute(args)
|
||||
@post_id = args[:post_id]
|
||||
raise Discourse::InvalidParameters.new(:post_id) if @post_id.blank?
|
||||
|
||||
post = Post.find_by(id: @post_id)
|
||||
return if post.nil?
|
||||
return if post.cook_method == Post.cook_methods[:raw_html]
|
||||
|
||||
hotlinked_map = post.post_hotlinked_media.preload(:upload).map { |r| [r.url, r] }.to_h
|
||||
|
||||
raw = InlineUploads.replace_hotlinked_image_urls(raw: post.raw) do |match_src|
|
||||
normalized_match_src = PostHotlinkedMedia.normalize_src(match_src)
|
||||
hotlinked_map[normalized_match_src]&.upload
|
||||
end
|
||||
|
||||
if post.raw != raw
|
||||
changes = { raw: raw, edit_reason: I18n.t("upload.edit_reason") }
|
||||
post.revise(Discourse.system_user, changes, bypass_bump: true, skip_staff_log: true)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -214,6 +214,40 @@ class InlineUploads
|
|||
end
|
||||
end
|
||||
|
||||
def self.replace_hotlinked_image_urls(raw:, &blk)
|
||||
replace = Proc.new do |match, match_src, replacement, _index|
|
||||
upload = blk.call(match_src)
|
||||
next if !upload
|
||||
|
||||
replacement =
|
||||
if replacement.include?(InlineUploads::PLACEHOLDER)
|
||||
replacement.sub(InlineUploads::PLACEHOLDER, upload.short_url)
|
||||
elsif replacement.include?(InlineUploads::PATH_PLACEHOLDER)
|
||||
replacement.sub(InlineUploads::PATH_PLACEHOLDER, upload.short_path)
|
||||
end
|
||||
|
||||
raw = raw.gsub(
|
||||
match,
|
||||
replacement
|
||||
)
|
||||
end
|
||||
|
||||
# there are 6 ways to insert an image in a post
|
||||
# HTML tag - <img src="http://...">
|
||||
InlineUploads.match_img(raw, external_src: true, &replace)
|
||||
|
||||
# BBCode tag - [img]http://...[/img]
|
||||
InlineUploads.match_bbcode_img(raw, external_src: true, &replace)
|
||||
|
||||
# Markdown linked image - [![alt](http://...)](http://...)
|
||||
# Markdown inline - ![alt](http://...)
|
||||
# Markdown inline - ![](http://... "image title")
|
||||
# Markdown inline - ![alt](http://... "image title")
|
||||
InlineUploads.match_md_inline_img(raw, external_src: true, &replace)
|
||||
|
||||
raw
|
||||
end
|
||||
|
||||
def self.matched_uploads(node)
|
||||
upload_path = Discourse.store.upload_path
|
||||
base_url = Discourse.base_url.sub(/https?:\/\//, "(https?://)")
|
||||
|
|
|
@ -47,7 +47,6 @@ class CookedPostProcessor
|
|||
remove_user_ids
|
||||
update_post_image
|
||||
enforce_nofollow
|
||||
pull_hotlinked_images
|
||||
grant_badges
|
||||
@post.link_post_uploads(fragments: @doc)
|
||||
DiscourseEvent.trigger(:post_process_cooked, @doc, @post)
|
||||
|
@ -361,41 +360,6 @@ class CookedPostProcessor
|
|||
PrettyText.add_rel_attributes_to_user_content(@doc, add_nofollow)
|
||||
end
|
||||
|
||||
def pull_hotlinked_images
|
||||
return if @opts[:skip_pull_hotlinked_images]
|
||||
# have we enough disk space?
|
||||
disable_if_low_on_disk_space # But still enqueue the job
|
||||
# make sure no other job is scheduled
|
||||
Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: @post.id)
|
||||
# schedule the job
|
||||
delay = SiteSetting.editing_grace_period + 1
|
||||
Jobs.enqueue_in(delay.seconds.to_i, :pull_hotlinked_images, post_id: @post.id)
|
||||
end
|
||||
|
||||
def disable_if_low_on_disk_space
|
||||
return if Discourse.store.external?
|
||||
return if !SiteSetting.download_remote_images_to_local
|
||||
return if available_disk_space >= SiteSetting.download_remote_images_threshold
|
||||
|
||||
SiteSetting.download_remote_images_to_local = false
|
||||
|
||||
# log the site setting change
|
||||
reason = I18n.t("disable_remote_images_download_reason")
|
||||
staff_action_logger = StaffActionLogger.new(Discourse.system_user)
|
||||
staff_action_logger.log_site_setting_change("download_remote_images_to_local", true, false, details: reason)
|
||||
|
||||
# also send a private message to the site contact user notify_about_low_disk_space
|
||||
notify_about_low_disk_space
|
||||
end
|
||||
|
||||
def notify_about_low_disk_space
|
||||
SystemMessage.create_from_system_user(Discourse.site_contact_user, :download_remote_images_disabled)
|
||||
end
|
||||
|
||||
def available_disk_space
|
||||
100 - DiskSpace.percent_free("#{Rails.root}/public/uploads")
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def post_process_images
|
||||
|
|
|
@ -88,4 +88,35 @@ describe Jobs::ProcessPost do
|
|||
expect(post.topic.reload.excerpt).to eq("Some OP content")
|
||||
end
|
||||
end
|
||||
|
||||
context "#enqueue_pull_hotlinked_images" do
|
||||
fab!(:post) { Fabricate(:post, created_at: 20.days.ago) }
|
||||
let(:job) { Jobs::ProcessPost.new }
|
||||
|
||||
it "runs even when download_remote_images_to_local is disabled" do
|
||||
# We want to run it to pull hotlinked optimized images
|
||||
SiteSetting.download_remote_images_to_local = false
|
||||
expect_enqueued_with(job: :pull_hotlinked_images, args: { post_id: post.id }) do
|
||||
job.execute({ post_id: post.id })
|
||||
end
|
||||
end
|
||||
|
||||
context "when download_remote_images_to_local? is enabled" do
|
||||
before do
|
||||
SiteSetting.download_remote_images_to_local = true
|
||||
end
|
||||
|
||||
it "enqueues" do
|
||||
expect_enqueued_with(job: :pull_hotlinked_images, args: { post_id: post.id }) do
|
||||
job.execute({ post_id: post.id })
|
||||
end
|
||||
end
|
||||
|
||||
it "does not run when requested to skip" do
|
||||
job.execute({ post_id: post.id, skip_pull_hotlinked_images: true })
|
||||
expect(Jobs::PullHotlinkedImages.jobs.size).to eq(0)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
|
|
@ -67,6 +67,21 @@ describe Jobs::PullHotlinkedImages do
|
|||
expect(post.reload.raw).to eq("<img src=\"#{Upload.last.short_url}\">")
|
||||
end
|
||||
|
||||
it 'enqueues raw replacement job with a delay' do
|
||||
Jobs.run_later!
|
||||
|
||||
post = Fabricate(:post, raw: "<img src='#{image_url}'>")
|
||||
stub_image_size
|
||||
|
||||
freeze_time
|
||||
Jobs.expects(:cancel_scheduled_job).with(:update_hotlinked_raw, post_id: post.id).once
|
||||
delay = SiteSetting.editing_grace_period + 1
|
||||
|
||||
expect_enqueued_with(job: :update_hotlinked_raw, args: { post_id: post.id }, at: Time.zone.now + delay.seconds) do
|
||||
Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
|
||||
end
|
||||
end
|
||||
|
||||
it 'removes downloaded images when they are no longer needed' do
|
||||
post = Fabricate(:post, raw: "<img src='#{image_url}'>")
|
||||
stub_image_size
|
||||
|
@ -196,14 +211,14 @@ describe Jobs::PullHotlinkedImages do
|
|||
expect(post.uploads).to contain_exactly(upload)
|
||||
end
|
||||
|
||||
it "skips raw_html posts" do
|
||||
it "skips editing raw for raw_html posts" do
|
||||
raw = "<img src=\"#{image_url}\">"
|
||||
post = Fabricate(:post, raw: raw, cook_method: Post.cook_methods[:raw_html])
|
||||
stub_image_size
|
||||
expect do
|
||||
post.rebake!
|
||||
post.reload
|
||||
end.not_to change { Upload.count }
|
||||
end.to change { Upload.count }.by(1)
|
||||
|
||||
expect(post.raw).to eq(raw)
|
||||
end
|
||||
|
@ -556,6 +571,43 @@ describe Jobs::PullHotlinkedImages do
|
|||
end
|
||||
end
|
||||
|
||||
context "#disable_if_low_on_disk_space" do
|
||||
fab!(:post) { Fabricate(:post, created_at: 20.days.ago) }
|
||||
let(:job) { Jobs::PullHotlinkedImages.new }
|
||||
|
||||
before do
|
||||
SiteSetting.download_remote_images_to_local = true
|
||||
SiteSetting.download_remote_images_threshold = 20
|
||||
job.stubs(:available_disk_space).returns(50)
|
||||
end
|
||||
|
||||
it "does nothing when there's enough disk space" do
|
||||
SiteSetting.expects(:download_remote_images_to_local=).never
|
||||
job.execute({ post_id: post.id })
|
||||
end
|
||||
|
||||
context "when there's not enough disk space" do
|
||||
|
||||
before { SiteSetting.download_remote_images_threshold = 75 }
|
||||
|
||||
it "disables download_remote_images_threshold and send a notification to the admin" do
|
||||
StaffActionLogger.any_instance.expects(:log_site_setting_change).once
|
||||
SystemMessage.expects(:create_from_system_user).with(Discourse.site_contact_user, :download_remote_images_disabled).once
|
||||
job.execute({ post_id: post.id })
|
||||
|
||||
expect(SiteSetting.download_remote_images_to_local).to eq(false)
|
||||
end
|
||||
|
||||
it "doesn't disable download_remote_images_to_local if site uses S3" do
|
||||
setup_s3
|
||||
job.execute({ post_id: post.id })
|
||||
|
||||
expect(SiteSetting.download_remote_images_to_local).to eq(true)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
def stub_s3(upload)
|
||||
stub_upload(upload)
|
||||
stub_request(:get, "https:" + upload.url).to_return(status: 200, body: file_from_fixtures("smallest.png"))
|
||||
|
|
|
@ -21,7 +21,6 @@ describe CookedPostProcessor do
|
|||
cpp.expects(:post_process_oneboxes).in_sequence(post_process)
|
||||
cpp.expects(:post_process_images).in_sequence(post_process)
|
||||
cpp.expects(:optimize_urls).in_sequence(post_process)
|
||||
cpp.expects(:pull_hotlinked_images).in_sequence(post_process)
|
||||
cpp.post_process
|
||||
|
||||
expect(PostUpload.exists?(post: post, upload: upload)).to eq(true)
|
||||
|
@ -1543,102 +1542,6 @@ describe CookedPostProcessor do
|
|||
end
|
||||
end
|
||||
|
||||
context "#pull_hotlinked_images" do
|
||||
|
||||
let(:post) { build(:post, created_at: 20.days.ago) }
|
||||
let(:cpp) { CookedPostProcessor.new(post) }
|
||||
|
||||
before { cpp.stubs(:available_disk_space).returns(90) }
|
||||
|
||||
it "runs even when download_remote_images_to_local is disabled" do
|
||||
# We want to run it to pull hotlinked optimized images
|
||||
SiteSetting.download_remote_images_to_local = false
|
||||
expect { cpp.pull_hotlinked_images }.
|
||||
to change { Jobs::PullHotlinkedImages.jobs.count }.by 1
|
||||
end
|
||||
|
||||
context "when download_remote_images_to_local? is enabled" do
|
||||
before do
|
||||
SiteSetting.download_remote_images_to_local = true
|
||||
end
|
||||
|
||||
it "disables download_remote_images if there is not enough disk space" do
|
||||
cpp.expects(:available_disk_space).returns(5)
|
||||
cpp.pull_hotlinked_images
|
||||
expect(SiteSetting.download_remote_images_to_local).to eq(false)
|
||||
end
|
||||
|
||||
it "does not run when requested to skip" do
|
||||
CookedPostProcessor.new(post, skip_pull_hotlinked_images: true).pull_hotlinked_images
|
||||
expect(Jobs::PullHotlinkedImages.jobs.size).to eq(0)
|
||||
end
|
||||
|
||||
context "and there is enough disk space" do
|
||||
before { cpp.expects(:disable_if_low_on_disk_space).at_least_once }
|
||||
|
||||
context "and the post has been updated by an actual user" do
|
||||
|
||||
before { post.id = 42 }
|
||||
|
||||
it "ensures only one job is scheduled right after the editing_grace_period" do
|
||||
freeze_time
|
||||
|
||||
Jobs.expects(:cancel_scheduled_job).with(:pull_hotlinked_images, post_id: post.id).once
|
||||
|
||||
delay = SiteSetting.editing_grace_period + 1
|
||||
|
||||
expect_enqueued_with(job: :pull_hotlinked_images, args: { post_id: post.id }, at: Time.zone.now + delay.seconds) do
|
||||
cpp.pull_hotlinked_images
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "#disable_if_low_on_disk_space" do
|
||||
|
||||
let(:post) { build(:post, created_at: 20.days.ago) }
|
||||
let(:cpp) { CookedPostProcessor.new(post) }
|
||||
|
||||
before do
|
||||
SiteSetting.download_remote_images_to_local = true
|
||||
SiteSetting.download_remote_images_threshold = 20
|
||||
cpp.stubs(:available_disk_space).returns(50)
|
||||
end
|
||||
|
||||
it "does nothing when there's enough disk space" do
|
||||
SiteSetting.expects(:download_remote_images_to_local=).never
|
||||
cpp.disable_if_low_on_disk_space
|
||||
end
|
||||
|
||||
context "when there's not enough disk space" do
|
||||
|
||||
before { SiteSetting.download_remote_images_threshold = 75 }
|
||||
|
||||
it "disables download_remote_images_threshold and send a notification to the admin" do
|
||||
StaffActionLogger.any_instance.expects(:log_site_setting_change).once
|
||||
SystemMessage.expects(:create_from_system_user).with(Discourse.site_contact_user, :download_remote_images_disabled).once
|
||||
cpp.disable_if_low_on_disk_space
|
||||
|
||||
expect(SiteSetting.download_remote_images_to_local).to eq(false)
|
||||
end
|
||||
|
||||
it "doesn't disable download_remote_images_to_local if site uses S3" do
|
||||
setup_s3
|
||||
cpp.disable_if_low_on_disk_space
|
||||
|
||||
expect(SiteSetting.download_remote_images_to_local).to eq(true)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "#is_a_hyperlink?" do
|
||||
|
||||
let(:post) { build(:post) }
|
||||
|
|
Loading…
Reference in New Issue