From f03d9cad06baafbd1d4028d0076fc2cbce277a0c Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Wed, 2 Nov 2016 11:14:02 +0800 Subject: [PATCH] PERF: `NOT IN` query is really inefficient for large tables. --- app/jobs/scheduled/clean_up_uploads.rb | 30 +++++++++---------- ...20161102024700_add_post_uploads_indexes.rb | 6 ++++ ...8_add_uploaded_avatar_id_index_to_users.rb | 5 ++++ ...20161102024838_add_user_avatars_indexes.rb | 6 ++++ ...0161102024900_add_user_profiles_indexes.rb | 6 ++++ .../20161102024920_add_categories_indexes.rb | 6 ++++ spec/jobs/clean_up_uploads_spec.rb | 10 +++++++ 7 files changed, 53 insertions(+), 16 deletions(-) create mode 100644 db/migrate/20161102024700_add_post_uploads_indexes.rb create mode 100644 db/migrate/20161102024818_add_uploaded_avatar_id_index_to_users.rb create mode 100644 db/migrate/20161102024838_add_user_avatars_indexes.rb create mode 100644 db/migrate/20161102024900_add_user_profiles_indexes.rb create mode 100644 db/migrate/20161102024920_add_categories_indexes.rb diff --git a/app/jobs/scheduled/clean_up_uploads.rb b/app/jobs/scheduled/clean_up_uploads.rb index 21f67f81f30..07aef08d580 100644 --- a/app/jobs/scheduled/clean_up_uploads.rb +++ b/app/jobs/scheduled/clean_up_uploads.rb @@ -5,31 +5,29 @@ module Jobs def execute(args) return unless SiteSetting.clean_up_uploads? - ignore_urls = [] - ignore_urls |= UserProfile.uniq.where("profile_background IS NOT NULL AND profile_background != ''").pluck(:profile_background) - ignore_urls |= UserProfile.uniq.where("card_background IS NOT NULL AND card_background != ''").pluck(:card_background) - ignore_urls |= Category.uniq.where("logo_url IS NOT NULL AND logo_url != ''").pluck(:logo_url) - ignore_urls |= Category.uniq.where("background_url IS NOT NULL AND background_url != ''").pluck(:background_url) - # Any URLs in site settings are fair game - ignore_urls |= [ + ignore_urls = [ SiteSetting.logo_url, SiteSetting.logo_small_url, SiteSetting.favicon_url, SiteSetting.apple_touch_icon_url ] - ids = [] - ids |= PostUpload.uniq.pluck(:upload_id) - ids |= User.uniq.where("uploaded_avatar_id IS NOT NULL").pluck(:uploaded_avatar_id) - ids |= UserAvatar.uniq.where("gravatar_upload_id IS NOT NULL").pluck(:gravatar_upload_id) - grace_period = [SiteSetting.clean_orphan_uploads_grace_period_hours, 1].max - result = Upload.where("retain_hours IS NULL OR created_at < current_timestamp - interval '1 hour' * retain_hours") - result = result.where("created_at < ?", grace_period.hour.ago) - result = result.where("id NOT IN (?)", ids) if !ids.empty? - result = result.where("url NOT IN (?)", ignore_urls) if !ignore_urls.empty? + result = Upload.where("uploads.retain_hours IS NULL OR uploads.created_at < current_timestamp - interval '1 hour' * uploads.retain_hours") + .where("uploads.created_at < ?", grace_period.hour.ago) + .joins("LEFT JOIN post_uploads pu ON pu.upload_id = uploads.id") + .joins("LEFT JOIN users u ON u.uploaded_avatar_id = uploads.id") + .joins("LEFT JOIN user_avatars ua ON (ua.gravatar_upload_id = uploads.id OR ua.custom_upload_id = uploads.id)") + .joins("LEFT JOIN user_profiles up ON up.profile_background = uploads.url OR up.card_background = uploads.url") + .joins("LEFT JOIN categories c ON c.logo_url = uploads.url OR c.background_url = uploads.url") + .where("pu.upload_id IS NULL") + .where("u.uploaded_avatar_id IS NULL") + .where("ua.gravatar_upload_id IS NULL AND ua.custom_upload_id IS NULL") + .where("up.profile_background IS NULL AND up.card_background IS NULL") + .where("c.logo_url IS NULL AND c.background_url IS NULL") + .where("uploads.url NOT IN (?)", ignore_urls) result.find_each do |upload| next if QueuedPost.where("raw LIKE '%#{upload.sha1}%'").exists? diff --git a/db/migrate/20161102024700_add_post_uploads_indexes.rb b/db/migrate/20161102024700_add_post_uploads_indexes.rb new file mode 100644 index 00000000000..c8f8ced7143 --- /dev/null +++ b/db/migrate/20161102024700_add_post_uploads_indexes.rb @@ -0,0 +1,6 @@ +class AddPostUploadsIndexes < ActiveRecord::Migration + def change + add_index :post_uploads, :post_id + add_index :post_uploads, :upload_id + end +end diff --git a/db/migrate/20161102024818_add_uploaded_avatar_id_index_to_users.rb b/db/migrate/20161102024818_add_uploaded_avatar_id_index_to_users.rb new file mode 100644 index 00000000000..2e5c6e66e17 --- /dev/null +++ b/db/migrate/20161102024818_add_uploaded_avatar_id_index_to_users.rb @@ -0,0 +1,5 @@ +class AddUploadedAvatarIdIndexToUsers < ActiveRecord::Migration + def change + add_index :users, :uploaded_avatar_id + end +end diff --git a/db/migrate/20161102024838_add_user_avatars_indexes.rb b/db/migrate/20161102024838_add_user_avatars_indexes.rb new file mode 100644 index 00000000000..ea1b26f7f78 --- /dev/null +++ b/db/migrate/20161102024838_add_user_avatars_indexes.rb @@ -0,0 +1,6 @@ +class AddUserAvatarsIndexes < ActiveRecord::Migration + def change + add_index :user_avatars, :custom_upload_id + add_index :user_avatars, :gravatar_upload_id + end +end diff --git a/db/migrate/20161102024900_add_user_profiles_indexes.rb b/db/migrate/20161102024900_add_user_profiles_indexes.rb new file mode 100644 index 00000000000..7a5ce85f26d --- /dev/null +++ b/db/migrate/20161102024900_add_user_profiles_indexes.rb @@ -0,0 +1,6 @@ +class AddUserProfilesIndexes < ActiveRecord::Migration + def change + add_index :user_profiles, :profile_background + add_index :user_profiles, :card_background + end +end diff --git a/db/migrate/20161102024920_add_categories_indexes.rb b/db/migrate/20161102024920_add_categories_indexes.rb new file mode 100644 index 00000000000..d5ad88f10d9 --- /dev/null +++ b/db/migrate/20161102024920_add_categories_indexes.rb @@ -0,0 +1,6 @@ +class AddCategoriesIndexes < ActiveRecord::Migration + def change + add_index :categories, :logo_url + add_index :categories, :background_url + end +end diff --git a/spec/jobs/clean_up_uploads_spec.rb b/spec/jobs/clean_up_uploads_spec.rb index 4341ec728ed..6e7c77cfc33 100644 --- a/spec/jobs/clean_up_uploads_spec.rb +++ b/spec/jobs/clean_up_uploads_spec.rb @@ -103,6 +103,16 @@ describe Jobs::CleanUpUploads do expect(Upload.find_by(id: upload.id)).to eq(upload) end + it "does not delete user custom upload" do + upload = fabricate_upload + Fabricate(:user, user_avatar: Fabricate(:user_avatar, custom_upload: upload)) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: upload.id)).to eq(upload) + end + it "does not delete uploads in a queued post" do upload = fabricate_upload