FIX: properly handle too large & broken images in posts
This commit is contained in:
parent
6f2a3cb026
commit
678e28794a
|
@ -5,11 +5,8 @@ require_dependency 'upload_creator'
|
|||
module Jobs
|
||||
|
||||
class PullHotlinkedImages < Jobs::Base
|
||||
|
||||
sidekiq_options queue: 'low'
|
||||
|
||||
LARGE_IMAGES = "large_images".freeze
|
||||
|
||||
def initialize
|
||||
@max_size = SiteSetting.max_image_size_kb.kilobytes
|
||||
end
|
||||
|
@ -47,26 +44,25 @@ module Jobs
|
|||
|
||||
raw = post.raw.dup
|
||||
start_raw = raw.dup
|
||||
|
||||
downloaded_urls = {}
|
||||
large_images = post.custom_fields[LARGE_IMAGES].presence || []
|
||||
|
||||
# recover from bad custom field silently
|
||||
unless Array === large_images
|
||||
large_images = []
|
||||
end
|
||||
large_images = JSON.parse(post.custom_fields[Post::LARGE_IMAGES].presence || "[]") rescue []
|
||||
broken_images = JSON.parse(post.custom_fields[Post::BROKEN_IMAGES].presence || "[]") rescue []
|
||||
downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES].presence || "{}") rescue {}
|
||||
|
||||
broken_images, new_large_images = [], []
|
||||
has_new_large_image = false
|
||||
has_new_broken_image = false
|
||||
has_downloaded_image = false
|
||||
|
||||
extract_images_from(post.cooked).each do |image|
|
||||
src = original_src = image['src']
|
||||
if src.start_with?("//")
|
||||
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}"
|
||||
end
|
||||
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
|
||||
|
||||
if is_valid_image_url(src)
|
||||
begin
|
||||
# have we already downloaded that file?
|
||||
unless downloaded_urls.include?(src) || large_images.include?(src) || broken_images.include?(src)
|
||||
unless downloaded_images.include?(src) || large_images.include?(src) || broken_images.include?(src)
|
||||
if hotlinked = download(src)
|
||||
if File.size(hotlinked.path) <= @max_size
|
||||
filename = File.basename(URI.parse(src).path)
|
||||
|
@ -74,15 +70,18 @@ module Jobs
|
|||
upload = UploadCreator.new(hotlinked, filename, origin: src).create_for(post.user_id)
|
||||
if upload.persisted?
|
||||
downloaded_urls[src] = upload.url
|
||||
downloaded_images[src.sub(/^https?:/i, "")] = upload.id
|
||||
has_downloaded_image = true
|
||||
else
|
||||
log(:info, "Failed to pull hotlinked image for post: #{post_id}: #{src} - #{upload.errors.full_messages.join("\n")}")
|
||||
end
|
||||
else
|
||||
large_images << original_src
|
||||
new_large_images << original_src
|
||||
large_images << original_src.sub(/^https?:/i, "")
|
||||
has_new_large_image = true
|
||||
end
|
||||
else
|
||||
broken_images << original_src
|
||||
broken_images << original_src.sub(/^https?:/i, "")
|
||||
has_new_broken_image = true
|
||||
end
|
||||
end
|
||||
# have we successfully downloaded that file?
|
||||
|
@ -111,42 +110,24 @@ module Jobs
|
|||
log(:error, "Failed to pull hotlinked image (#{src}) post: #{post_id}\n" + e.message + "\n" + e.backtrace.join("\n"))
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
if new_large_images.length > 0
|
||||
post.custom_fields[LARGE_IMAGES] = large_images
|
||||
post.save_custom_fields
|
||||
end
|
||||
large_images.uniq!
|
||||
broken_images.uniq!
|
||||
|
||||
post.custom_fields[Post::LARGE_IMAGES] = large_images.to_json if large_images.present?
|
||||
post.custom_fields[Post::BROKEN_IMAGES] = broken_images.to_json if broken_images.present?
|
||||
post.custom_fields[Post::DOWNLOADED_IMAGES] = downloaded_images.to_json if downloaded_images.present?
|
||||
# only save custom fields if there are any
|
||||
post.save_custom_fields if large_images.present? || broken_images.present? || downloaded_images.present?
|
||||
|
||||
post.reload
|
||||
|
||||
if start_raw == post.raw && raw != post.raw
|
||||
changes = { raw: raw, edit_reason: I18n.t("upload.edit_reason") }
|
||||
# we never want that job to bump the topic
|
||||
options = { bypass_bump: true }
|
||||
post.revise(Discourse.system_user, changes, options)
|
||||
elsif downloaded_urls.present? || new_large_images.present?
|
||||
post.revise(Discourse.system_user, changes, bypass_bump: true)
|
||||
elsif has_downloaded_image || has_new_large_image || has_new_broken_image
|
||||
post.trigger_post_process(true)
|
||||
elsif broken_images.present?
|
||||
start_html = post.cooked
|
||||
doc = Nokogiri::HTML::fragment(start_html)
|
||||
images = doc.css("img[src]") - doc.css("img.avatar")
|
||||
images.each do |tag|
|
||||
src = tag['src']
|
||||
if broken_images.include?(src)
|
||||
tag.name = 'span'
|
||||
tag.set_attribute('class', 'broken-image fa fa-chain-broken')
|
||||
tag.set_attribute('title', I18n.t('post.image_placeholder.broken'))
|
||||
tag.remove_attribute('src')
|
||||
tag.remove_attribute('width')
|
||||
tag.remove_attribute('height')
|
||||
end
|
||||
end
|
||||
if start_html == post.cooked && doc.to_html != post.cooked
|
||||
post.update_column(:cooked, doc.to_html)
|
||||
post.publish_change_to_clients! :revised
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -58,7 +58,11 @@ class Post < ActiveRecord::Base
|
|||
# We can pass several creating options to a post via attributes
|
||||
attr_accessor :image_sizes, :quoted_post_numbers, :no_bump, :invalidate_oneboxes, :cooking_options, :skip_unique_check
|
||||
|
||||
SHORT_POST_CHARS = 1200
|
||||
LARGE_IMAGES ||= "large_images".freeze
|
||||
BROKEN_IMAGES ||= "broken_images".freeze
|
||||
DOWNLOADED_IMAGES ||= "downloaded_images".freeze
|
||||
|
||||
SHORT_POST_CHARS ||= 1200
|
||||
|
||||
scope :private_posts_for_user, ->(user) {
|
||||
where("posts.topic_id IN (SELECT topic_id
|
||||
|
|
|
@ -31,9 +31,9 @@ class CookedPostProcessor
|
|||
def post_process(bypass_bump = false)
|
||||
DistributedMutex.synchronize("post_process_#{@post.id}") do
|
||||
DiscourseEvent.trigger(:before_post_process_cooked, @doc, @post)
|
||||
keep_reverse_index_up_to_date
|
||||
post_process_images
|
||||
post_process_oneboxes
|
||||
post_process_images
|
||||
keep_reverse_index_up_to_date
|
||||
optimize_urls
|
||||
update_post_image
|
||||
enforce_nofollow
|
||||
|
@ -65,26 +65,28 @@ class CookedPostProcessor
|
|||
end
|
||||
end
|
||||
|
||||
upload_ids |= oneboxed_image_uploads.pluck(:id)
|
||||
upload_ids |= downloaded_images.values.select { |id| Upload.exists?(id) }
|
||||
|
||||
values = upload_ids.map { |u| "(#{@post.id},#{u})" }.join(",")
|
||||
PostUpload.transaction do
|
||||
PostUpload.where(post_id: @post.id).delete_all
|
||||
if upload_ids.length > 0
|
||||
if upload_ids.size > 0
|
||||
PostUpload.exec_sql("INSERT INTO post_uploads (post_id, upload_id) VALUES #{values}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def post_process_images
|
||||
images = extract_images
|
||||
return if images.blank?
|
||||
|
||||
images.each do |img|
|
||||
next if large_images.include?(img["src"]) && add_large_image_placeholder!(img)
|
||||
|
||||
limit_size!(img)
|
||||
convert_to_link!(img)
|
||||
extract_images.each do |img|
|
||||
src = img["src"].sub(/^https?:/i, "")
|
||||
if large_images.include?(src)
|
||||
add_large_image_placeholder!(img)
|
||||
elsif broken_images.include?(src)
|
||||
add_broken_image_placeholder!(img)
|
||||
else
|
||||
limit_size!(img)
|
||||
convert_to_link!(img)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -125,32 +127,48 @@ class CookedPostProcessor
|
|||
end
|
||||
|
||||
img.remove
|
||||
true
|
||||
end
|
||||
|
||||
def add_broken_image_placeholder!(img)
|
||||
img.name = "span"
|
||||
img.set_attribute("class", "broken-image fa fa-chain-broken")
|
||||
img.set_attribute("title", I18n.t("post.image_placeholder.broken"))
|
||||
img.remove_attribute("src")
|
||||
img.remove_attribute("width")
|
||||
img.remove_attribute("height")
|
||||
end
|
||||
|
||||
def large_images
|
||||
@large_images ||= @post.custom_fields[Jobs::PullHotlinkedImages::LARGE_IMAGES].presence || []
|
||||
@large_images ||= JSON.parse(@post.custom_fields[Post::LARGE_IMAGES].presence || "[]") rescue []
|
||||
end
|
||||
|
||||
def broken_images
|
||||
@broken_images ||= JSON.parse(@post.custom_fields[Post::BROKEN_IMAGES].presence || "[]") rescue []
|
||||
end
|
||||
|
||||
def downloaded_images
|
||||
@downloaded_images ||= JSON.parse(@post.custom_fields[Post::DOWNLOADED_IMAGES].presence || "{}") rescue {}
|
||||
end
|
||||
|
||||
def extract_images
|
||||
# all image with a src attribute
|
||||
# all images with a src attribute
|
||||
@doc.css("img[src]") -
|
||||
# minus, data images
|
||||
# minus data images
|
||||
@doc.css("img[src^='data']") -
|
||||
# minus, emojis
|
||||
# minus emojis
|
||||
@doc.css("img.emoji") -
|
||||
# minus, image inside oneboxes
|
||||
# minus oneboxed images
|
||||
oneboxed_images -
|
||||
# minus, images inside quotes
|
||||
# minus images inside quotes
|
||||
@doc.css(".quote img")
|
||||
end
|
||||
|
||||
def extract_images_for_post
|
||||
# all image with a src attribute
|
||||
# all images with a src attribute
|
||||
@doc.css("img[src]") -
|
||||
# minus, emojis
|
||||
# minus emojis
|
||||
@doc.css("img.emoji") -
|
||||
# minus, images inside quotes
|
||||
# minus images inside quotes
|
||||
@doc.css(".quote img")
|
||||
end
|
||||
|
||||
|
@ -158,19 +176,6 @@ class CookedPostProcessor
|
|||
@doc.css(".onebox-body img, .onebox img")
|
||||
end
|
||||
|
||||
def oneboxed_image_uploads
|
||||
urls = Set.new
|
||||
|
||||
oneboxed_images.each do |img|
|
||||
url = img["src"].sub(/^https?:/i, "")
|
||||
urls << url
|
||||
urls << "http:#{url}"
|
||||
urls << "https:#{url}"
|
||||
end
|
||||
|
||||
Upload.where(origin: urls.to_a)
|
||||
end
|
||||
|
||||
def limit_size!(img)
|
||||
# retrieve the size from
|
||||
# 1) the width/height attributes
|
||||
|
@ -377,15 +382,16 @@ class CookedPostProcessor
|
|||
Oneboxer.onebox(url, args)
|
||||
end
|
||||
|
||||
uploads = oneboxed_image_uploads.select(:url, :origin)
|
||||
oneboxed_images.each do |img|
|
||||
if large_images.include?(img["src"])
|
||||
src = img["src"].sub(/^https?:/i, "")
|
||||
|
||||
if large_images.include?(src) || broken_images.include?(src)
|
||||
img.remove
|
||||
next
|
||||
end
|
||||
|
||||
url = img["src"].sub(/^https?:/i, "")
|
||||
upload = uploads.find { |u| u.origin.sub(/^https?:/i, "") == url }
|
||||
upload_id = downloaded_images[src]
|
||||
upload = Upload.find(upload_id) if upload_id
|
||||
img["src"] = upload.url if upload.present?
|
||||
|
||||
# make sure we grab dimensions for oneboxed images
|
||||
|
@ -462,7 +468,7 @@ class CookedPostProcessor
|
|||
# don't download remote images for posts that are more than n days old
|
||||
return unless @post.created_at > (Date.today - SiteSetting.download_remote_images_max_days_old)
|
||||
# we only want to run the job whenever it's changed by a user
|
||||
return if @post.last_editor_id == Discourse.system_user.id
|
||||
return if @post.last_editor_id && @post.last_editor_id <= 0
|
||||
# make sure no other job is scheduled
|
||||
Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: @post.id)
|
||||
# schedule the job
|
||||
|
|
|
@ -10,9 +10,9 @@ describe CookedPostProcessor do
|
|||
let(:post_process) { sequence("post_process") }
|
||||
|
||||
it "post process in sequence" do
|
||||
cpp.expects(:keep_reverse_index_up_to_date).in_sequence(post_process)
|
||||
cpp.expects(:post_process_images).in_sequence(post_process)
|
||||
cpp.expects(:post_process_oneboxes).in_sequence(post_process)
|
||||
cpp.expects(:post_process_images).in_sequence(post_process)
|
||||
cpp.expects(:keep_reverse_index_up_to_date).in_sequence(post_process)
|
||||
cpp.expects(:optimize_urls).in_sequence(post_process)
|
||||
cpp.expects(:pull_hotlinked_images).in_sequence(post_process)
|
||||
cpp.post_process
|
||||
|
|
|
@ -125,26 +125,6 @@ describe Jobs::PullHotlinkedImages do
|
|||
end
|
||||
end
|
||||
|
||||
describe 'replace' do
|
||||
it 'broken image with placeholder' do
|
||||
post = Fabricate(:post, raw: "<img src='#{broken_image_url}'>")
|
||||
|
||||
Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
|
||||
post.reload
|
||||
|
||||
expect(post.cooked).to match(/<span class="broken-image fa fa-chain-broken/)
|
||||
end
|
||||
|
||||
it 'large image with placeholder' do
|
||||
post = Fabricate(:post, raw: "<img src='#{large_image_url}'>")
|
||||
|
||||
Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
|
||||
post.reload
|
||||
|
||||
expect(post.cooked).to match(/<div class="large-image-placeholder"><a href=.*\ target="_blank" .*\>/)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#is_valid_image_url' do
|
||||
subject { described_class.new }
|
||||
|
||||
|
|
Loading…
Reference in New Issue