From 241bf484978a0c74a6a62e4ae2c035287ed2b0e8 Mon Sep 17 00:00:00 2001 From: Gerhard Schlager Date: Sat, 13 Jan 2024 23:34:20 +0100 Subject: [PATCH] DEV: Allow rebakes to generate optimized images at the same time Previously only Sidekiq was allowed to generate more than one optimized image at the same time per machine. This adds an easy mechanism to allow the same in rake tasks and other tools. --- app/models/optimized_image.rb | 15 ++++++-- lib/tasks/import.rake | 53 +++++++++----------------- lib/tasks/posts.rake | 3 ++ script/bulk_import/uploads_importer.rb | 10 ++--- 4 files changed, 37 insertions(+), 44 deletions(-) diff --git a/app/models/optimized_image.rb b/app/models/optimized_image.rb index 9eae6c2bf40..b3e390efcd9 100644 --- a/app/models/optimized_image.rb +++ b/app/models/optimized_image.rb @@ -14,15 +14,24 @@ class OptimizedImage < ActiveRecord::Base # this can very easily lead to runaway CPU so slowing it down is beneficial and it is hijacked # # we can not afford this blocking in Sidekiq cause it can lead to starvation - if Sidekiq.server? - DistributedMutex.synchronize("optimized_image_#{upload_id}_#{width}_#{height}") { yield } - else + if lock_per_machine? DistributedMutex.synchronize("optimized_image_host_#{@hostname}") do DistributedMutex.synchronize("optimized_image_#{upload_id}_#{width}_#{height}") { yield } end + else + DistributedMutex.synchronize("optimized_image_#{upload_id}_#{width}_#{height}") { yield } end end + def self.lock_per_machine? + return @lock_per_machine if defined?(@lock_per_machine) + @lock_per_machine = !Sidekiq.server? + end + + def self.lock_per_machine=(value) + @lock_per_machine = value + end + def self.create_for(upload, width, height, opts = {}) return if width <= 0 || height <= 0 return if upload.try(:sha1).blank? diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake index df49e09301d..d94a48d7ea0 100644 --- a/lib/tasks/import.rake +++ b/lib/tasks/import.rake @@ -720,63 +720,48 @@ desc "Rebake posts that contain polls" task "import:rebake_uncooked_posts_with_polls" => :environment do log "Rebaking posts with polls" - Jobs.run_immediately! + posts = Post.where("EXISTS (SELECT 1 FROM polls WHERE polls.post_id = posts.id)") - posts = - Post.where("EXISTS (SELECT 1 FROM polls WHERE polls.post_id = posts.id)").where( - "baked_version <> ? or baked_version IS NULL", - Post::BAKED_VERSION, - ) - - max_count = posts.count - current_count = 0 - - posts.find_each(order: :desc) do |post| - post.rebake! - current_count += 1 - print "\r%7d / %7d" % [current_count, max_count] - end + rebake_posts(posts) end desc "Rebake posts that contain events" task "import:rebake_uncooked_posts_with_events" => :environment do log "Rebaking posts with events" - Jobs.run_immediately! - posts = Post.where( "EXISTS (SELECT 1 FROM discourse_post_event_events WHERE discourse_post_event_events.id = posts.id)", - ).where("baked_version <> ? or baked_version IS NULL", Post::BAKED_VERSION) + ) - max_count = posts.count - current_count = 0 - - posts.find_each(order: :desc) do |post| - post.rebake! - current_count += 1 - print "\r%7d / %7d" % [current_count, max_count] - end + rebake_posts(posts) end desc "Rebake posts that have tag" task "import:rebake_uncooked_posts_with_tag", [:tag_name] => :environment do |_task, args| log "Rebaking posts with tag" - Jobs.run_immediately! - posts = Post.where( "EXISTS (SELECT 1 FROM topic_tags JOIN tags ON tags.id = topic_tags.tag_id WHERE topic_tags.topic_id = posts.topic_id AND tags.name = ?)", args[:tag_name], - ).where("baked_version <> ? or baked_version IS NULL", Post::BAKED_VERSION) + ) + + rebake_posts(posts) +end + +def rebake_posts(posts) + Jobs.run_immediately! + OptimizedImage.lock_per_machine = false max_count = posts.count current_count = 0 - posts.find_each(order: :desc) do |post| - post.rebake! - current_count += 1 - print "\r%7d / %7d" % [current_count, max_count] - end + posts + .where("baked_version <> ? or baked_version IS NULL", Post::BAKED_VERSION) + .find_each(order: :desc) do |post| + post.rebake! + current_count += 1 + print "\r%7d / %7d" % [current_count, max_count] + end end diff --git a/lib/tasks/posts.rake b/lib/tasks/posts.rake index 16f2e84f16a..4caf1c485d3 100644 --- a/lib/tasks/posts.rake +++ b/lib/tasks/posts.rake @@ -13,6 +13,9 @@ task "posts:rebake_uncooked_posts" => :environment do # this rake task without worrying about your sidekiq imploding Jobs.run_immediately! + # don't lock per machine, we want to be able to run this from multiple consoles + OptimizedImage.lock_per_machine = false + ENV["RAILS_DB"] ? rebake_uncooked_posts : rebake_uncooked_posts_all_sites end diff --git a/script/bulk_import/uploads_importer.rb b/script/bulk_import/uploads_importer.rb index 99e1cd42cca..968c2310cc8 100644 --- a/script/bulk_import/uploads_importer.rb +++ b/script/bulk_import/uploads_importer.rb @@ -6,13 +6,6 @@ require "etc" require "sqlite3" require "colored2" -# hack so that OptimizedImage.lock beliefs that it's running in a Sidekiq job -module Sidekiq - def self.server? - true - end -end - module BulkImport class UploadsImporter TRANSACTION_SIZE = 1000 @@ -356,6 +349,9 @@ module BulkImport avatar_upload_ids = Set.new max_count = 0 + # allow more than 1 thread to optimized images at the same time + OptimizedImage.lock_per_machine = false + init_threads << Thread.new do query("SELECT id FROM optimized_images", @output_db).tap do |result_set| result_set.each { |row| optimized_upload_ids << row["id"] }