DEV: convert scheduled job EnsurePostUploadsExistence into a rake task

This commit is contained in:
Vinoth Kannan 2019-04-09 02:07:35 +05:30
parent cc53f902c6
commit 914ada1c74
8 changed files with 69 additions and 137 deletions

View File

@ -1,47 +0,0 @@
# frozen_string_literal: true
module Jobs
class EnsurePostUploadsExistence < Jobs::Scheduled
every 1.hour
MISSING_UPLOADS ||= "missing_uploads"
def execute(args)
return unless SiteSetting.enable_missing_post_uploads_check?
PostCustomField
.where(name: MISSING_UPLOADS)
.where("created_at < ?", 1.month.ago)
.destroy_all
Post
.joins("LEFT JOIN post_custom_fields cf ON posts.id = cf.post_id AND cf.name = '#{MISSING_UPLOADS}'")
.where("(posts.cooked LIKE '%<a %' OR posts.cooked LIKE '%<img %') AND cf.id IS NULL")
.find_in_batches(batch_size: 100) do |posts|
Post.preload_custom_fields(posts, [MISSING_UPLOADS])
posts.each do |post|
fragments ||= Nokogiri::HTML::fragment(post.cooked)
missing = []
fragments.css("a/@href", "img/@src").each do |media|
src = media.value
next if src.blank? || (src =~ /\/uploads\//).blank?
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
next unless Discourse.store.has_been_uploaded?(src) || src =~ /\A\/[^\/]/i
missing << src unless Upload.get_from_url(src) || OptimizedImage.get_from_url(src)
end
if missing.present?
missing.each { |src| PostCustomField.create!(post_id: post.id, name: MISSING_UPLOADS, value: src) }
else
PostCustomField.create!(post_id: post.id, name: MISSING_UPLOADS, value: nil)
end
end
end
end
end
end

View File

@ -0,0 +1,24 @@
module HasUrl
extend ActiveSupport::Concern
class_methods do
def extract_url(url)
url.match(self::URL_REGEX)
end
def get_from_url(url)
return if url.blank?
uri = begin
URI(URI.unescape(url))
rescue URI::Error
end
return if uri&.path.blank?
data = extract_url(uri.path)
return if data.blank?
self.find_by("url LIKE ?", "%#{data[1]}")
end
end
end

View File

@ -4,10 +4,12 @@ require_dependency "db_helper"
require_dependency "file_store/local_store"
class OptimizedImage < ActiveRecord::Base
include HasUrl
belongs_to :upload
# BUMP UP if optimized image algorithm changes
VERSION = 2
URL_REGEX ||= /(\/optimized\/\dX[\/\.\w]*\/([a-zA-Z0-9]+)[\.\w]*)/
def self.lock(upload_id, width, height)
@hostname ||= `hostname`.strip rescue "unknown"
@ -350,25 +352,6 @@ class OptimizedImage < ActiveRecord::Base
false
end
end
def self.extract_optimized_url(url)
url.match(/(\/optimized\/\dX[\/\.\w]*\/([a-zA-Z0-9]+)[\.\w]*)/)
end
def self.get_from_url(url)
return if url.blank?
uri = begin
URI(URI.unescape(url))
rescue URI::Error
end
return if uri&.path.blank?
data = extract_optimized_url(uri.path)
return if data.blank?
OptimizedImage.find_by("url LIKE ?", "%#{data[1]}")
end
end
# == Schema Information

View File

@ -8,9 +8,11 @@ require_dependency "base62"
class Upload < ActiveRecord::Base
include ActionView::Helpers::NumberHelper
include HasUrl
SHA1_LENGTH = 40
SEEDED_ID_THRESHOLD = 0
URL_REGEX ||= /(\/original\/\dX[\/\.\w]*\/([a-zA-Z0-9]+)[\.\w]*)/
belongs_to :user
@ -192,27 +194,6 @@ class Upload < ActiveRecord::Base
Digest::SHA1.file(path).hexdigest
end
def self.extract_upload_url(url)
url.match(/(\/original\/\dX[\/\.\w]*\/([a-zA-Z0-9]+)[\.\w]*)/)
end
def self.get_from_url(url)
return if url.blank?
uri = begin
URI(URI.unescape(url))
rescue URI::Error
end
return if uri&.path.blank?
data = extract_upload_url(uri.path)
return if data.blank?
sha1 = data[2]
upload = nil
upload = Upload.find_by(sha1: sha1) if sha1&.length == SHA1_LENGTH
upload || Upload.find_by("url LIKE ?", "%#{data[1]}")
end
def human_filesize
number_to_human_size(self.filesize)
end

View File

@ -1481,9 +1481,6 @@ developer:
enable_safe_mode:
default: true
client: true
enable_missing_post_uploads_check:
default: false
hidden: true
embedding:
feed_polling_enabled:

View File

@ -387,3 +387,28 @@ task 'posts:reorder_posts', [:topic_id] => [:environment] do |_, args|
puts "", "Done.", ""
end
desc 'Finds missing post upload records from cooked HTML content'
task 'posts:missing_uploads' => :environment do
name = "missing_uploads"
PostCustomField.where(name: name).destroy_all
posts = Post.where("posts.cooked LIKE '%<a %' OR posts.cooked LIKE '%<img %'").select(:id, :cooked)
missing = []
posts.find_each do |post|
Nokogiri::HTML::fragment(post.cooked).css("a/@href", "img/@src").each do |media|
src = media.value
next if src.blank? || (src =~ /\/uploads\//).blank?
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
next unless Discourse.store.has_been_uploaded?(src) || src =~ /\A\/[^\/]/i
missing << src unless Upload.get_from_url(src) || OptimizedImage.get_from_url(src)
end
missing.each { |src| PostCustomField.create!(post_id: post.id, name: name, value: src) }
putc "."
end
puts "", "#{missing.count} post uploads are missing.", ""
end

View File

@ -1,47 +0,0 @@
require 'rails_helper'
describe Jobs::EnsurePostUploadsExistence do
context '.execute' do
let(:upload) { Fabricate(:upload) }
let(:optimized) { Fabricate(:optimized_image, url: '/uploads/default/optimized/1X/d1c2d40ab994e8410c_100x200.png') }
context "when enabled" do
before do
SiteSetting.enable_missing_post_uploads_check = true
end
it 'should create post custom field for missing upload' do
Fabricate(:post, cooked: "A sample post <img src='#{upload.url}'>")
upload.destroy!
described_class.new.execute({})
field = PostCustomField.find_by(name: Jobs::EnsurePostUploadsExistence::MISSING_UPLOADS)
expect(field).to be_present
expect(field.value).to eq(upload.url)
end
it 'should create post custom field with nil value' do
Fabricate(:post, cooked: "A sample post <a href='#{upload.url}'> <img src='#{optimized.url}'>")
described_class.new.execute({})
field = PostCustomField.find_by(name: Jobs::EnsurePostUploadsExistence::MISSING_UPLOADS)
expect(field).to be_present
expect(field.value).to eq(nil)
end
end
context "when disabled" do
before do
SiteSetting.enable_missing_post_uploads_check = false
end
it "does not execute" do
Fabricate(:post, cooked: "A sample post <img src='#{upload.url}'>")
upload.destroy!
described_class.new.execute({})
field = PostCustomField.find_by(name: Jobs::EnsurePostUploadsExistence::MISSING_UPLOADS)
expect(field).to be_blank
end
end
end
end

View File

@ -54,4 +54,20 @@ RSpec.describe "Post rake tasks" do
expect(post.reload.cooked).to eq('<p>The quick brown fox jumps over the lazy dog</p>')
end
end
describe 'missing_uploads' do
let(:upload) { Fabricate(:upload) }
let(:optimized) { Fabricate(:optimized_image, url: '/uploads/default/optimized/1X/d1c2d40ab994e8410c_100x200.png') }
it 'should create post custom field for missing upload' do
Fabricate(:post, cooked: "A sample post <img src='#{upload.url}'>")
upload.destroy!
Rake::Task['posts:missing_uploads'].invoke
field = PostCustomField.find_by(name: "missing_uploads")
expect(field).to be_present
expect(field.value).to eq(upload.url)
end
end
end