From 434035f167eb15f3e2d8c6a52577d66d0e140b71 Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Thu, 6 Sep 2018 09:58:01 +0800 Subject: [PATCH] FIX: Link post to uploads in `PostCreator`. * This ensures that uploads are linked to their post on creation instead of a background job which may be delayed if Sidekiq is facing difficulties. --- app/models/post.rb | 35 +++++++++- lib/cooked_post_processor.rb | 42 +++++------ lib/post_creator.rb | 12 +++- lib/post_jobs_enqueuer.rb | 4 +- script/import_scripts/lithium.rb | 2 +- spec/components/cooked_post_processor_spec.rb | 61 ++++------------ spec/components/post_creator_spec.rb | 23 ++++-- spec/models/post_spec.rb | 70 +++++++++++++++++++ 8 files changed, 165 insertions(+), 84 deletions(-) diff --git a/app/models/post.rb b/app/models/post.rb index 2a74e880c2b..293bed1a29a 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -536,7 +536,7 @@ class Post < ActiveRecord::Base QuotedPost.extract_from(self) # make sure we trigger the post process - trigger_post_process(true) + trigger_post_process(bypass_bump: true) publish_change_to_clients!(:rebaked) @@ -650,10 +650,11 @@ class Post < ActiveRecord::Base end # Enqueue post processing for this post - def trigger_post_process(bypass_bump = false) + def trigger_post_process(bypass_bump: false, skip_link_post_uploads: false) args = { post_id: id, - bypass_bump: bypass_bump + bypass_bump: bypass_bump, + skip_link_post_uploads: skip_link_post_uploads } args[:image_sizes] = image_sizes if image_sizes.present? args[:invalidate_oneboxes] = true if invalidate_oneboxes.present? @@ -784,6 +785,34 @@ class Post < ActiveRecord::Base locked_by_id.present? end + def link_post_uploads(fragments: nil) + upload_ids = [] + fragments ||= Nokogiri::HTML::fragment(self.cooked) + + fragments.css("a/@href", "img/@src").each do |media| + if upload = Upload.get_from_url(media.value) + upload_ids << upload.id + end + end + + upload_ids |= Upload.where(id: downloaded_images.values).pluck(:id) + values = upload_ids.map! { |upload_id| "(#{self.id},#{upload_id})" }.join(",") + + PostUpload.transaction do + PostUpload.where(post_id: self.id).delete_all + + if values.size > 0 + DB.exec("INSERT INTO post_uploads (post_id, upload_id) VALUES #{values}") + end + end + end + + def downloaded_images + JSON.parse(self.custom_fields[Post::DOWNLOADED_IMAGES].presence || "{}") + rescue JSON::ParserError + {} + end + private def parse_quote_into_arguments(quote) diff --git a/lib/cooked_post_processor.rb b/lib/cooked_post_processor.rb index 8daef42e89e..467a7fbc563 100644 --- a/lib/cooked_post_processor.rb +++ b/lib/cooked_post_processor.rb @@ -35,7 +35,11 @@ class CookedPostProcessor post_process_oneboxes post_process_images post_process_quotes - keep_reverse_index_up_to_date + + unless @opts[:skip_link_post_uploads] + @post.link_post_uploads(fragments: @doc) + end + optimize_urls update_post_image enforce_nofollow @@ -58,26 +62,6 @@ class CookedPostProcessor BadgeGranter.grant(Badge.find(Badge::FirstReplyByEmail), @post.user, post_id: @post.id) if @post.is_reply_by_email? end - def keep_reverse_index_up_to_date - upload_ids = [] - - @doc.css("a/@href", "img/@src").each do |media| - if upload = Upload.get_from_url(media.value) - upload_ids << upload.id - end - end - - upload_ids |= downloaded_images.values.select { |id| Upload.exists?(id) } - - values = upload_ids.map { |u| "(#{@post.id},#{u})" }.join(",") - PostUpload.transaction do - PostUpload.where(post_id: @post.id).delete_all - if upload_ids.size > 0 - DB.exec("INSERT INTO post_uploads (post_id, upload_id) VALUES #{values}") - end - end - end - def post_process_images extract_images.each do |img| src = img["src"].sub(/^https?:/i, "") @@ -159,15 +143,25 @@ class CookedPostProcessor end def large_images - @large_images ||= JSON.parse(@post.custom_fields[Post::LARGE_IMAGES].presence || "[]") rescue [] + @large_images ||= + begin + JSON.parse(@post.custom_fields[Post::LARGE_IMAGES].presence || "[]") + rescue JSON::ParserError + [] + end end def broken_images - @broken_images ||= JSON.parse(@post.custom_fields[Post::BROKEN_IMAGES].presence || "[]") rescue [] + @broken_images ||= + begin + JSON.parse(@post.custom_fields[Post::BROKEN_IMAGES].presence || "[]") + rescue JSON::ParserError + [] + end end def downloaded_images - @downloaded_images ||= JSON.parse(@post.custom_fields[Post::DOWNLOADED_IMAGES].presence || "{}") rescue {} + @downloaded_images ||= @post.downloaded_images end def extract_images diff --git a/lib/post_creator.rb b/lib/post_creator.rb index 80f890ddba4..9cfe9c2fcc8 100644 --- a/lib/post_creator.rb +++ b/lib/post_creator.rb @@ -173,6 +173,7 @@ class PostCreator update_topic_auto_close update_user_counts create_embedded_topic + link_post_uploads ensure_in_allowed_users if guardian.is_staff? unarchive_message @@ -188,7 +189,7 @@ class PostCreator publish track_latest_on_category - enqueue_jobs unless @opts[:skip_jobs] + enqueue_jobs(skip_link_post_uploads: true) unless @opts[:skip_jobs] BadgeGranter.queue_badge_grant(Badge::Trigger::PostRevision, post: @post) trigger_after_events unless opts[:skip_events] @@ -213,12 +214,13 @@ class PostCreator @post end - def enqueue_jobs + def enqueue_jobs(skip_link_post_uploads: false) return unless @post && !@post.errors.present? PostJobsEnqueuer.new(@post, @topic, new_topic?, import_mode: @opts[:import_mode], - post_alert_options: @opts[:post_alert_options] + post_alert_options: @opts[:post_alert_options], + skip_link_post_uploads: skip_link_post_uploads ).enqueue_jobs end @@ -349,6 +351,10 @@ class PostCreator rollback_from_errors!(embed) unless embed.save end + def link_post_uploads + @post.link_post_uploads + end + def handle_spam if @spam GroupMessage.create(Group[:moderators].name, diff --git a/lib/post_jobs_enqueuer.rb b/lib/post_jobs_enqueuer.rb index 4dde3b73afa..a0abef96c0d 100644 --- a/lib/post_jobs_enqueuer.rb +++ b/lib/post_jobs_enqueuer.rb @@ -38,7 +38,9 @@ class PostJobsEnqueuer end def trigger_post_post_process - @post.trigger_post_process + @post.trigger_post_process( + skip_link_post_uploads: @opts[:skip_link_post_uploads] + ) end def after_post_create diff --git a/script/import_scripts/lithium.rb b/script/import_scripts/lithium.rb index 4d82412522c..442df14f8a4 100644 --- a/script/import_scripts/lithium.rb +++ b/script/import_scripts/lithium.rb @@ -886,7 +886,7 @@ SQL post.raw = new_raw post.cooked = post.cook(new_raw) cpp = CookedPostProcessor.new(post) - cpp.keep_reverse_index_up_to_date + cpp.link_post_uploads post.custom_fields["import_post_process"] = true post.save end diff --git a/spec/components/cooked_post_processor_spec.rb b/spec/components/cooked_post_processor_spec.rb index 27ceeb6b63a..8bf669506f3 100644 --- a/spec/components/cooked_post_processor_spec.rb +++ b/spec/components/cooked_post_processor_spec.rb @@ -4,18 +4,29 @@ require "cooked_post_processor" describe CookedPostProcessor do context ".post_process" do + let(:upload) do + Fabricate(:upload, + url: '/uploads/default/original/1X/1/1234567890123456.jpg' + ) + end + + let(:post) do + Fabricate(:post, raw: <<~RAW) + + RAW + end - let(:post) { build(:post) } let(:cpp) { CookedPostProcessor.new(post) } let(:post_process) { sequence("post_process") } it "post process in sequence" do cpp.expects(:post_process_oneboxes).in_sequence(post_process) cpp.expects(:post_process_images).in_sequence(post_process) - cpp.expects(:keep_reverse_index_up_to_date).in_sequence(post_process) cpp.expects(:optimize_urls).in_sequence(post_process) cpp.expects(:pull_hotlinked_images).in_sequence(post_process) cpp.post_process + + expect(PostUpload.exists?(post: post, upload: upload)).to eq(true) end end @@ -40,52 +51,6 @@ describe CookedPostProcessor do end end - context ".keep_reverse_index_up_to_date" do - let(:video_upload) { Fabricate(:upload, url: '/uploads/default/original/1X/1/1234567890123456.mp4') } - let(:image_upload) { Fabricate(:upload, url: '/uploads/default/original/1X/1/1234567890123456.jpg') } - let(:audio_upload) { Fabricate(:upload, url: '/uploads/default/original/1X/1/1234567890123456.ogg') } - let(:attachment_upload) { Fabricate(:upload, url: '/uploads/default/original/1X/1/1234567890123456.csv') } - - let(:raw) do - <<~RAW - Link - - - - - - RAW - end - - let(:post) { Fabricate(:post, raw: raw) } - let(:cpp) { CookedPostProcessor.new(post) } - - it "finds all the uploads in the post" do - cpp.keep_reverse_index_up_to_date - - expect(PostUpload.where(post: post).map(&:upload_id).sort).to eq( - [video_upload.id, image_upload.id, audio_upload.id, attachment_upload.id].sort - ) - end - - it "cleans the reverse index up for the current post" do - cpp.keep_reverse_index_up_to_date - - post_uploads_ids = post.post_uploads.pluck(:id) - - cpp.keep_reverse_index_up_to_date - - expect(post.reload.post_uploads.pluck(:id)).to_not eq(post_uploads_ids) - end - - end - context ".post_process_images" do shared_examples "leave dimensions alone" do diff --git a/spec/components/post_creator_spec.rb b/spec/components/post_creator_spec.rb index 7499b6a276b..ecb30681dfc 100644 --- a/spec/components/post_creator_spec.rb +++ b/spec/components/post_creator_spec.rb @@ -186,11 +186,26 @@ describe PostCreator do end it 'queues up post processing job when saved' do - Jobs.expects(:enqueue).with(:feature_topic_users, has_key(:topic_id)) - Jobs.expects(:enqueue).with(:process_post, has_key(:post_id)) - Jobs.expects(:enqueue).with(:post_alert, has_key(:post_id)) - Jobs.expects(:enqueue).with(:notify_mailing_list_subscribers, has_key(:post_id)) creator.create + + post = Post.last + post_id = post.id + topic_id = post.topic_id + + process_post_args = Jobs::ProcessPost.jobs.first["args"].first + expect(process_post_args["skip_link_post_uploads"]).to eq(true) + expect(process_post_args["post_id"]).to eq(post_id) + + feature_topic_users_args = Jobs::FeatureTopicUsers.jobs.first["args"].first + expect(feature_topic_users_args["topic_id"]).to eq(topic_id) + + post_alert_args = Jobs::PostAlert.jobs.first["args"].first + expect(post_alert_args["post_id"]).to eq(post_id) + + notify_mailing_list_subscribers_args = + Jobs::NotifyMailingListSubscribers.jobs.first["args"].first + + expect(notify_mailing_list_subscribers_args["post_id"]).to eq(post_id) end it 'passes the invalidate_oneboxes along to the job if present' do diff --git a/spec/models/post_spec.rb b/spec/models/post_spec.rb index 1c43e0a00a3..0467843dd76 100644 --- a/spec/models/post_spec.rb +++ b/spec/models/post_spec.rb @@ -1153,4 +1153,74 @@ describe Post do expect(post.revisions.pluck(:number)).to eq([1, 2]) end + describe '#link_post_uploads' do + let(:video_upload) do + Fabricate(:upload, + url: '/uploads/default/original/1X/1/1234567890123456.mp4' + ) + end + + let(:image_upload) do + Fabricate(:upload, + url: '/uploads/default/original/1X/1/1234567890123456.jpg' + ) + end + + let(:audio_upload) do + Fabricate(:upload, + url: '/uploads/default/original/1X/1/1234567890123456.ogg' + ) + end + + let(:attachment_upload) do + Fabricate(:upload, + url: '/uploads/default/original/1X/1/1234567890123456.csv' + ) + end + + let(:raw) do + <<~RAW + Link + + + + + + RAW + end + + let(:post) { Fabricate(:post, raw: raw) } + + it "finds all the uploads in the post" do + post.custom_fields[Post::DOWNLOADED_IMAGES] = { + "/uploads/default/original/1X/1/1234567890123456.csv": attachment_upload.id + } + + post.save_custom_fields + post.link_post_uploads + + expect(PostUpload.where(post: post).pluck(:upload_id)).to contain_exactly( + video_upload.id, image_upload.id, audio_upload.id, attachment_upload.id + ) + end + + it "cleans the reverse index up for the current post" do + post.link_post_uploads + + post_uploads_ids = post.post_uploads.pluck(:id) + + post.link_post_uploads + + expect(post.reload.post_uploads.pluck(:id)).to_not contain_exactly( + post_uploads_ids + ) + end + end + end