task "images:compress" => :environment do io = ImageOptim.new images = Dir.glob("#{Rails.root}/app/**/*.png") image_sizes = Hash[*images.map{|i| [i,File.size(i)]}.to_a.flatten] io.optimize_images!(images) do |name, optimized| if optimized new_size = File.size(name) puts "#{name} => from: #{image_sizes[name.to_s]} to: #{new_size}" end end end desc "clean orphan uploaded files" task "images:clean_orphans" => :environment do RailsMultisite::ConnectionManagement.each_connection do |db| puts "Cleaning up #{db}" # ligthweight safety net to prevent users from wiping all their uploads out if PostUpload.count == 0 && Upload.count > 0 puts "The reverse index is empty. Make sure you run the `images:reindex` task" next end Upload.joins("LEFT OUTER JOIN post_uploads ON uploads.id = post_uploads.upload_id") .where("post_uploads.upload_id IS NULL") .find_each do |u| u.destroy putc "." end end puts "\ndone." end desc "download all hotlinked images" task "images:pull_hotlinked" => :environment do RailsMultisite::ConnectionManagement.each_connection do |db| # currently only works when using the local storage next if Discourse.store.external? puts "Pulling hotlinked images for: #{db}" # shorthand to the asset host asset_host = Rails.configuration.action_controller.asset_host # maximum size of the file in bytes max_size = SiteSetting.max_image_size_kb * 1024 # will hold the urls of the already downloaded images upload_urls = {} Post.find_each do |post| has_changed = false extract_images_from(post.cooked).each do |image| src = image['src'] if src.present? && src !~ /^\/[^\/]/ && !src.starts_with?(Discourse.base_url_no_prefix) && !(asset_host.present? && src.starts_with?(asset_host)) begin # have we already downloaded that file? if !upload_urls.include?(src) # initialize upload_urls[src] = nil # download the file hotlinked = download(src, max_size) # if the hotlinked image is OK if hotlinked.size <= max_size file = ActionDispatch::Http::UploadedFile.new(tempfile: hotlinked, filename: File.basename(URI.parse(src).path)) upload_urls[src] = Upload.create_for(post.user_id, file, hotlinked.size).url else puts "\nFailed to pull: #{src} for post ##{post.id} - too large\n" end end # if we have downloaded a file if upload_urls[src].present? src_for_regexp = src.gsub("?", "\\?").gsub(".", "\\.").gsub("+", "\\+") # there are 4 ways to insert an image in a post # HTML tag - post.raw.gsub!(/src=["']#{src_for_regexp}["']/i, "src='#{upload_urls[src]}'") # BBCode tag - [img]http://...[/img] post.raw.gsub!(/\[img\]#{src_for_regexp}\[\/img\]/i, "[img]#{upload_urls[src]}[/img]") # Markdown - ![alt](http://...) post.raw.gsub!(/!\[([^\]]*)\]\(#{src_for_regexp}\)/) { "![#{$1}](#{upload_urls[src]})" } # Direct link post.raw.gsub!(src, "") # mark the post as changed has_changed = true end rescue => e puts "\nFailed to pull: #{src} for post ##{post.id} - #{e}\n" ensure # close & delete the temporary file hotlinked && hotlinked.close! end end end if has_changed # since the raw has changed, we cook the post once again post.cooked = post.cook(post.raw, topic_id: post.topic_id, invalidate_oneboxes: true) # update both raw & cooked version of the post Post.exec_sql('update posts set cooked = ?, raw = ? where id = ?', post.cooked, post.raw, post.id) # trigger the post processing post.trigger_post_process putc "#" else putc "." end end end puts "\ndone." end def extract_images_from(html) doc = Nokogiri::HTML::fragment(html) doc.css("img") - doc.css(".onebox-result img") - doc.css("img.avatar") end def download(url, max_size) # create a temporary file temp_file = Tempfile.new(["discourse-hotlinked", File.extname(URI.parse(url).path)]) # download the hotlinked image File.open(temp_file.path, "wb") do |f| hotlinked = open(url, "rb", read_timeout: 5) while f.size <= max_size && data = hotlinked.read(max_size) f.write(data) end hotlinked.close end temp_file end