FEATURE: ensure consistency of post uploads in cooked content

This commit is contained in:
Vinoth Kannan 2019-04-04 02:23:28 +05:30
parent 14952a9cbc
commit b3fb0a7039
4 changed files with 119 additions and 0 deletions

View File

@ -0,0 +1,48 @@
# frozen_string_literal: true
module Jobs
class EnsurePostUploadsExistence < Jobs::Scheduled
every 1.hour
MISSING_UPLOADS ||= "missing_uploads"
def execute(args)
PostCustomField
.where(name: MISSING_UPLOADS)
.where("created_at < ?", 1.month.ago)
.destroy_all
Post
.joins("LEFT JOIN post_custom_fields cf ON posts.id = cf.post_id AND cf.name = 'missing_uploads'")
.where("(posts.cooked LIKE '%<a %' OR posts.cooked LIKE '%<img %') AND cf.id IS NULL")
.find_in_batches(batch_size: 100) do |posts|
Post.preload_custom_fields(posts, [MISSING_UPLOADS])
posts.each do |post|
fragments ||= Nokogiri::HTML::fragment(post.cooked)
missing = []
fragments.css("a/@href", "img/@src").each do |media|
src = media.value
next if src.blank? || (src =~ /\/uploads\//).blank?
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
next unless Discourse.store.has_been_uploaded?(src) || src =~ /\A\/[^\/]/i
missing << src unless Upload.get_from_url(src) || OptimizedImage.get_from_url(src)
end
if missing.present?
post.preloaded_custom_fields = nil
post.custom_fields[MISSING_UPLOADS] = missing
post.save_custom_fields
elsif post.custom_fields[MISSING_UPLOADS].present?
PostCustomField.find_by(post_id: post.id, name: MISSING_UPLOADS).destroy!
end
end
end
end
end
end

View File

@ -36,6 +36,30 @@ module Jobs
optimized_image.destroy!
upload.rebake_posts_on_old_scheme
end
Post.where("cooked LIKE '%<img %'").find_each do |post|
missing = post.find_missing_uploads
next if missing.blank?
missing.each do |src|
src.sub!("https://discourse-cdn-sjc1.com/mcneel", "")
next unless src.split("/").length == 5
source = "#{Discourse.store.public_dir}#{src}"
if File.exists?(source)
PostCustomField.create!(post_id: post.id, value: src, key: "pu_found")
next
end
source = "#{Discourse.store.tombstone_dir}#{src}"
if File.exists?(source)
PostCustomField.create!(post_id: post.id, value: src, key: "pu_tombstone")
next
end
PostCustomField.create!(post_id: post.id, value: src, key: "pu_missing")
end
end
end
end

View File

@ -350,6 +350,25 @@ class OptimizedImage < ActiveRecord::Base
false
end
end
def self.extract_optimized_url(url)
url.match(/(\/optimized\/\dX[\/\.\w]*\/([a-zA-Z0-9]+)[\.\w]*)/)
end
def self.get_from_url(url)
return if url.blank?
uri = begin
URI(URI.unescape(url))
rescue URI::Error
end
return if uri&.path.blank?
data = extract_optimized_url(uri.path)
return if data.blank?
OptimizedImage.find_by("url LIKE ?", "%#{data[1]}")
end
end
# == Schema Information

View File

@ -0,0 +1,28 @@
require 'rails_helper'
describe Jobs::EnsurePostUploadsExistence do
context '.execute' do
let(:upload) { Fabricate(:upload) }
let(:optimized) { Fabricate(:optimized_image, url: '/uploads/default/optimized/1X/d1c2d40ab994e8410c_100x200.png') }
it 'should create post custom field for missing upload' do
post = Fabricate(:post, cooked: "A sample post <img src='#{upload.url}'>")
upload.destroy!
described_class.new.execute({})
field = PostCustomField.last
expect(field.name).to eq(Jobs::EnsurePostUploadsExistence::MISSING_UPLOADS)
expect(field.value).to eq(upload.url)
end
it 'should not create post custom fields' do
post = Fabricate(:post, cooked: "A sample post <a href='#{upload.url}'> <img src='#{optimized.url}'>")
expect {
described_class.new.execute({})
}.not_to change {
PostCustomField.count
}
end
end
end