2019-05-02 18:17:27 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2014-06-30 20:09:55 -04:00
|
|
|
module Jobs
|
|
|
|
# if locale changes or search algorithm changes we may want to reindex stuff
|
2019-10-02 00:01:53 -04:00
|
|
|
class ReindexSearch < ::Jobs::Scheduled
|
2018-02-19 22:41:00 -05:00
|
|
|
every 2.hours
|
2014-06-30 20:09:55 -04:00
|
|
|
|
2020-08-31 22:12:26 -04:00
|
|
|
CLEANUP_GRACE_PERIOD = 1.day.ago
|
2019-04-08 04:51:39 -04:00
|
|
|
|
2014-06-30 20:09:55 -04:00
|
|
|
def execute(args)
|
2020-06-24 01:36:51 -04:00
|
|
|
@verbose = true if args && Hash === args && args[:verbose]
|
|
|
|
|
2017-08-16 07:38:34 -04:00
|
|
|
rebuild_problem_topics
|
|
|
|
rebuild_problem_posts
|
|
|
|
rebuild_problem_categories
|
|
|
|
rebuild_problem_users
|
2017-08-25 11:52:18 -04:00
|
|
|
rebuild_problem_tags
|
2019-03-31 22:06:27 -04:00
|
|
|
clean_post_search_data
|
2019-04-08 04:51:39 -04:00
|
|
|
clean_topic_search_data
|
2020-06-24 01:36:51 -04:00
|
|
|
|
|
|
|
@verbose = nil
|
2017-08-16 07:38:34 -04:00
|
|
|
end
|
|
|
|
|
2019-04-01 19:12:39 -04:00
|
|
|
def rebuild_problem_categories(limit: 500)
|
2017-08-16 08:18:59 -04:00
|
|
|
category_ids = load_problem_category_ids(limit)
|
2017-08-16 07:38:34 -04:00
|
|
|
|
2020-06-24 01:36:51 -04:00
|
|
|
if @verbose
|
|
|
|
puts "rebuilding #{category_ids.length} categories"
|
|
|
|
end
|
|
|
|
|
2017-08-16 08:18:59 -04:00
|
|
|
category_ids.each do |id|
|
|
|
|
category = Category.find_by(id: id)
|
|
|
|
SearchIndexer.index(category, force: true) if category
|
2017-08-16 07:38:34 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-04-01 19:12:39 -04:00
|
|
|
def rebuild_problem_users(limit: 10000)
|
2017-08-16 08:18:59 -04:00
|
|
|
user_ids = load_problem_user_ids(limit)
|
2017-08-16 07:38:34 -04:00
|
|
|
|
2020-06-24 01:36:51 -04:00
|
|
|
if @verbose
|
|
|
|
puts "rebuilding #{user_ids.length} users"
|
|
|
|
end
|
|
|
|
|
2017-08-16 08:18:59 -04:00
|
|
|
user_ids.each do |id|
|
|
|
|
user = User.find_by(id: id)
|
|
|
|
SearchIndexer.index(user, force: true) if user
|
2017-08-16 07:38:34 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-04-01 19:12:39 -04:00
|
|
|
def rebuild_problem_topics(limit: 10000)
|
2017-08-16 08:18:59 -04:00
|
|
|
topic_ids = load_problem_topic_ids(limit)
|
2017-08-16 07:38:34 -04:00
|
|
|
|
2020-06-24 01:36:51 -04:00
|
|
|
if @verbose
|
|
|
|
puts "rebuilding #{topic_ids.length} topics"
|
|
|
|
end
|
|
|
|
|
2017-08-16 08:18:59 -04:00
|
|
|
topic_ids.each do |id|
|
|
|
|
topic = Topic.find_by(id: id)
|
|
|
|
SearchIndexer.index(topic, force: true) if topic
|
2017-08-16 07:38:34 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-06-24 03:29:45 -04:00
|
|
|
def rebuild_problem_posts(limit: 20000, indexer: SearchIndexer, verbose: false)
|
2017-08-16 08:18:59 -04:00
|
|
|
post_ids = load_problem_post_ids(limit)
|
2020-06-24 03:29:45 -04:00
|
|
|
verbose ||= @verbose
|
2017-08-16 07:38:34 -04:00
|
|
|
|
2020-06-24 03:29:45 -04:00
|
|
|
if verbose
|
2020-06-24 01:36:51 -04:00
|
|
|
puts "rebuilding #{post_ids.length} posts"
|
|
|
|
end
|
|
|
|
|
2020-06-24 03:29:45 -04:00
|
|
|
i = 0
|
2017-08-16 08:18:59 -04:00
|
|
|
post_ids.each do |id|
|
|
|
|
# could be deleted while iterating through batch
|
2018-02-19 22:41:00 -05:00
|
|
|
if post = Post.find_by(id: id)
|
2019-04-01 19:12:39 -04:00
|
|
|
indexer.index(post, force: true)
|
2020-06-24 03:29:45 -04:00
|
|
|
i += 1
|
|
|
|
|
|
|
|
if verbose && i % 1000 == 0
|
|
|
|
puts "#{i} posts reindexed"
|
|
|
|
end
|
2018-02-19 22:41:00 -05:00
|
|
|
end
|
2017-08-16 07:38:34 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-04-01 19:12:39 -04:00
|
|
|
def rebuild_problem_tags(limit: 10000)
|
2017-08-25 11:52:18 -04:00
|
|
|
tag_ids = load_problem_tag_ids(limit)
|
|
|
|
|
2020-06-24 01:36:51 -04:00
|
|
|
if @verbose
|
|
|
|
puts "rebuilding #{tag_ids.length} tags"
|
|
|
|
end
|
|
|
|
|
2017-08-25 11:52:18 -04:00
|
|
|
tag_ids.each do |id|
|
|
|
|
tag = Tag.find_by(id: id)
|
|
|
|
SearchIndexer.index(tag, force: true) if tag
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2017-08-16 07:38:34 -04:00
|
|
|
private
|
|
|
|
|
2019-03-31 22:06:27 -04:00
|
|
|
def clean_post_search_data
|
2020-06-24 01:36:51 -04:00
|
|
|
puts "cleaning up post search data" if @verbose
|
|
|
|
|
2019-03-31 22:06:27 -04:00
|
|
|
PostSearchData
|
|
|
|
.joins("LEFT JOIN posts p ON p.id = post_search_data.post_id")
|
|
|
|
.where("p.raw = ''")
|
|
|
|
.delete_all
|
2019-04-02 22:10:41 -04:00
|
|
|
|
2019-04-08 04:51:39 -04:00
|
|
|
DB.exec(<<~SQL, deleted_at: CLEANUP_GRACE_PERIOD)
|
2019-04-02 22:10:41 -04:00
|
|
|
DELETE FROM post_search_data
|
|
|
|
WHERE post_id IN (
|
|
|
|
SELECT post_id
|
|
|
|
FROM post_search_data
|
|
|
|
LEFT JOIN posts ON post_search_data.post_id = posts.id
|
|
|
|
INNER JOIN topics ON posts.topic_id = topics.id
|
2019-06-04 03:19:44 -04:00
|
|
|
WHERE (topics.deleted_at IS NOT NULL
|
|
|
|
AND topics.deleted_at <= :deleted_at) OR (
|
|
|
|
posts.deleted_at IS NOT NULL AND
|
|
|
|
posts.deleted_at <= :deleted_at
|
|
|
|
)
|
|
|
|
|
2019-04-02 22:10:41 -04:00
|
|
|
)
|
|
|
|
SQL
|
2019-03-31 22:06:27 -04:00
|
|
|
end
|
|
|
|
|
2019-04-08 04:51:39 -04:00
|
|
|
def clean_topic_search_data
|
2020-06-24 01:36:51 -04:00
|
|
|
puts "cleaning up topic search data" if @verbose
|
|
|
|
|
2019-04-08 04:51:39 -04:00
|
|
|
DB.exec(<<~SQL, deleted_at: CLEANUP_GRACE_PERIOD)
|
|
|
|
DELETE FROM topic_search_data
|
|
|
|
WHERE topic_id IN (
|
|
|
|
SELECT topic_id
|
|
|
|
FROM topic_search_data
|
|
|
|
INNER JOIN topics ON topic_search_data.topic_id = topics.id
|
|
|
|
WHERE topics.deleted_at IS NOT NULL
|
|
|
|
AND topics.deleted_at <= :deleted_at
|
|
|
|
)
|
|
|
|
SQL
|
|
|
|
end
|
|
|
|
|
2017-08-16 08:18:59 -04:00
|
|
|
def load_problem_post_ids(limit)
|
2019-04-01 19:36:53 -04:00
|
|
|
params = {
|
|
|
|
locale: SiteSetting.default_locale,
|
2020-07-23 02:52:20 -04:00
|
|
|
version: SearchIndexer::MIN_POST_REINDEX_VERSION,
|
2019-04-01 19:36:53 -04:00
|
|
|
limit: limit
|
|
|
|
}
|
|
|
|
|
|
|
|
DB.query_single(<<~SQL, params)
|
|
|
|
SELECT
|
|
|
|
posts.id
|
|
|
|
FROM posts
|
2020-05-12 02:08:34 -04:00
|
|
|
JOIN topics ON topics.id = posts.topic_id
|
2019-04-01 19:36:53 -04:00
|
|
|
LEFT JOIN post_search_data pd
|
|
|
|
ON pd.locale = :locale
|
2020-07-23 02:52:20 -04:00
|
|
|
AND pd.version >= :version
|
2019-04-01 19:36:53 -04:00
|
|
|
AND pd.post_id = posts.id
|
|
|
|
WHERE pd.post_id IS NULL
|
2019-06-04 03:53:35 -04:00
|
|
|
AND posts.deleted_at IS NULL
|
2019-04-01 19:36:53 -04:00
|
|
|
AND topics.deleted_at IS NULL
|
|
|
|
AND posts.raw != ''
|
|
|
|
ORDER BY posts.id DESC
|
|
|
|
LIMIT :limit
|
|
|
|
SQL
|
2017-08-16 07:38:34 -04:00
|
|
|
end
|
|
|
|
|
2017-08-16 08:18:59 -04:00
|
|
|
def load_problem_category_ids(limit)
|
2017-08-16 07:38:34 -04:00
|
|
|
Category.joins(:category_search_data)
|
|
|
|
.where('category_search_data.locale != ?
|
2020-07-23 02:10:05 -04:00
|
|
|
OR category_search_data.version != ?', SiteSetting.default_locale, SearchIndexer::CATEGORY_INDEX_VERSION)
|
2019-06-03 21:47:10 -04:00
|
|
|
.order('categories.id asc')
|
2017-08-16 07:38:34 -04:00
|
|
|
.limit(limit)
|
2017-08-16 08:18:59 -04:00
|
|
|
.pluck(:id)
|
2017-08-16 07:38:34 -04:00
|
|
|
end
|
|
|
|
|
2017-08-16 08:18:59 -04:00
|
|
|
def load_problem_topic_ids(limit)
|
2017-08-16 07:38:34 -04:00
|
|
|
Topic.joins(:topic_search_data)
|
|
|
|
.where('topic_search_data.locale != ?
|
2020-07-23 02:10:05 -04:00
|
|
|
OR topic_search_data.version != ?', SiteSetting.default_locale, SearchIndexer::TOPIC_INDEX_VERSION)
|
2019-06-03 21:47:10 -04:00
|
|
|
.order('topics.id desc')
|
2017-08-16 07:38:34 -04:00
|
|
|
.limit(limit)
|
2017-08-16 08:18:59 -04:00
|
|
|
.pluck(:id)
|
2017-08-16 07:38:34 -04:00
|
|
|
end
|
|
|
|
|
2017-08-16 08:18:59 -04:00
|
|
|
def load_problem_user_ids(limit)
|
2017-08-16 07:38:34 -04:00
|
|
|
User.joins(:user_search_data)
|
|
|
|
.where('user_search_data.locale != ?
|
2020-07-23 02:10:05 -04:00
|
|
|
OR user_search_data.version != ?', SiteSetting.default_locale, SearchIndexer::USER_INDEX_VERSION)
|
2019-06-03 21:47:10 -04:00
|
|
|
.order('users.id asc')
|
2017-08-16 07:38:34 -04:00
|
|
|
.limit(limit)
|
2017-08-16 08:18:59 -04:00
|
|
|
.pluck(:id)
|
2014-06-30 20:09:55 -04:00
|
|
|
end
|
2017-08-25 11:52:18 -04:00
|
|
|
|
|
|
|
def load_problem_tag_ids(limit)
|
|
|
|
Tag.joins(:tag_search_data)
|
|
|
|
.where('tag_search_data.locale != ?
|
2020-07-23 02:10:05 -04:00
|
|
|
OR tag_search_data.version != ?', SiteSetting.default_locale, SearchIndexer::TAG_INDEX_VERSION)
|
2019-06-03 21:47:10 -04:00
|
|
|
.order('tags.id asc')
|
2017-08-25 11:52:18 -04:00
|
|
|
.limit(limit)
|
|
|
|
.pluck(:id)
|
|
|
|
end
|
2014-06-30 20:09:55 -04:00
|
|
|
end
|
|
|
|
end
|