FIX: Clean up `topic_search_data` of trashed topics.

This keeps the index and table smaller.
This commit is contained in:
Guo Xiang Tan 2019-04-08 16:51:39 +08:00
parent c4997ce85f
commit 108c231d1c
2 changed files with 38 additions and 4 deletions

View File

@ -3,6 +3,8 @@ module Jobs
class ReindexSearch < Jobs::Scheduled class ReindexSearch < Jobs::Scheduled
every 2.hours every 2.hours
CLEANUP_GRACE_PERIOD = 1.week.ago
def execute(args) def execute(args)
rebuild_problem_topics rebuild_problem_topics
rebuild_problem_posts rebuild_problem_posts
@ -10,6 +12,7 @@ module Jobs
rebuild_problem_users rebuild_problem_users
rebuild_problem_tags rebuild_problem_tags
clean_post_search_data clean_post_search_data
clean_topic_search_data
end end
def rebuild_problem_categories(limit: 500) def rebuild_problem_categories(limit: 500)
@ -67,7 +70,7 @@ module Jobs
.where("p.raw = ''") .where("p.raw = ''")
.delete_all .delete_all
DB.exec(<<~SQL, deleted_at: 1.week.ago) DB.exec(<<~SQL, deleted_at: CLEANUP_GRACE_PERIOD)
DELETE FROM post_search_data DELETE FROM post_search_data
WHERE post_id IN ( WHERE post_id IN (
SELECT post_id SELECT post_id
@ -80,6 +83,19 @@ module Jobs
SQL SQL
end end
def clean_topic_search_data
DB.exec(<<~SQL, deleted_at: CLEANUP_GRACE_PERIOD)
DELETE FROM topic_search_data
WHERE topic_id IN (
SELECT topic_id
FROM topic_search_data
INNER JOIN topics ON topic_search_data.topic_id = topics.id
WHERE topics.deleted_at IS NOT NULL
AND topics.deleted_at <= :deleted_at
)
SQL
end
def load_problem_post_ids(limit) def load_problem_post_ids(limit)
params = { params = {
locale: SiteSetting.default_locale, locale: SiteSetting.default_locale,

View File

@ -83,6 +83,24 @@ describe Jobs::ReindexSearch do
end end
describe '#execute' do describe '#execute' do
it "should clean up topic_search_data of trashed topics" do
topic = Fabricate(:post).topic
topic2 = Fabricate(:post).topic
[topic, topic2].each { |t| SearchIndexer.index(t, force: true) }
freeze_time(described_class::CLEANUP_GRACE_PERIOD) do
topic.trash!
end
expect { subject.execute({}) }.to change { TopicSearchData.count }.by(-1)
expect(Topic.pluck(:id)).to contain_exactly(topic2.id)
expect(TopicSearchData.pluck(:topic_id)).to contain_exactly(
topic2.topic_search_data.topic_id
)
end
it( it(
"should clean up post_search_data of posts with empty raw or posts from " \ "should clean up post_search_data of posts with empty raw or posts from " \
"trashed topics" "trashed topics"
@ -96,18 +114,18 @@ describe Jobs::ReindexSearch do
post3.topic.trash! post3.topic.trash!
post4 = nil post4 = nil
freeze_time(1.week.ago) do freeze_time(described_class::CLEANUP_GRACE_PERIOD) do
post4 = Fabricate(:post) post4 = Fabricate(:post)
post4.topic.trash! post4.topic.trash!
end end
expect { subject.execute({}) }.to change { PostSearchData.count }.by(-2) expect { subject.execute({}) }.to change { PostSearchData.count }.by(-2)
expect(Post.all.pluck(:id)).to contain_exactly( expect(Post.pluck(:id)).to contain_exactly(
post.id, post2.id, post3.id, post4.id post.id, post2.id, post3.id, post4.id
) )
expect(PostSearchData.all.pluck(:post_id)).to contain_exactly( expect(PostSearchData.pluck(:post_id)).to contain_exactly(
post.post_search_data.post_id, post3.post_search_data.post_id post.post_search_data.post_id, post3.post_search_data.post_id
) )
end end