FIX: Don't attempt to reindex posts that have an empty raw.

If the post ids keep loading, we might end up in a situations where
we're always loading the same post ids over and over again without
indexing anything new.

Follow up to daeda80ada.
This commit is contained in:
Guo Xiang Tan 2019-04-02 07:12:39 +08:00
parent d5a61ab167
commit 3fc5dbb045
2 changed files with 54 additions and 14 deletions

View File

@ -12,7 +12,7 @@ module Jobs
clean_post_search_data clean_post_search_data
end end
def rebuild_problem_categories(limit = 500) def rebuild_problem_categories(limit: 500)
category_ids = load_problem_category_ids(limit) category_ids = load_problem_category_ids(limit)
category_ids.each do |id| category_ids.each do |id|
@ -21,7 +21,7 @@ module Jobs
end end
end end
def rebuild_problem_users(limit = 10000) def rebuild_problem_users(limit: 10000)
user_ids = load_problem_user_ids(limit) user_ids = load_problem_user_ids(limit)
user_ids.each do |id| user_ids.each do |id|
@ -30,7 +30,7 @@ module Jobs
end end
end end
def rebuild_problem_topics(limit = 10000) def rebuild_problem_topics(limit: 10000)
topic_ids = load_problem_topic_ids(limit) topic_ids = load_problem_topic_ids(limit)
topic_ids.each do |id| topic_ids.each do |id|
@ -39,18 +39,18 @@ module Jobs
end end
end end
def rebuild_problem_posts(limit = 20000) def rebuild_problem_posts(limit: 20000, indexer: SearchIndexer)
post_ids = load_problem_post_ids(limit) post_ids = load_problem_post_ids(limit)
post_ids.each do |id| post_ids.each do |id|
# could be deleted while iterating through batch # could be deleted while iterating through batch
if post = Post.find_by(id: id) if post = Post.find_by(id: id)
SearchIndexer.index(post, force: true) indexer.index(post, force: true)
end end
end end
end end
def rebuild_problem_tags(limit = 10000) def rebuild_problem_tags(limit: 10000)
tag_ids = load_problem_tag_ids(limit) tag_ids = load_problem_tag_ids(limit)
tag_ids.each do |id| tag_ids.each do |id|
@ -75,6 +75,7 @@ module Jobs
LEFT JOIN post_search_data pd ON pd.locale = ? AND pd.version = ? AND p2.id = pd.post_id LEFT JOIN post_search_data pd ON pd.locale = ? AND pd.version = ? AND p2.id = pd.post_id
WHERE pd.post_id IS NULL WHERE pd.post_id IS NULL
)', SiteSetting.default_locale, Search::INDEX_VERSION) )', SiteSetting.default_locale, Search::INDEX_VERSION)
.where("posts.raw != ''")
.limit(limit) .limit(limit)
.order('posts.id DESC') .order('posts.id DESC')
.pluck(:id) .pluck(:id)

View File

@ -29,6 +29,44 @@ describe Jobs::ReindexSearch do
end end
end end
describe 'rebuild_problem_posts' do
class FakeIndexer
def self.index(post, force:)
@posts ||= []
@posts.push(post)
end
def self.posts
@posts
end
def self.reset
@posts.clear
end
end
after do
FakeIndexer.reset
end
it 'should not reindex posts with empty raw' do
post = Fabricate(:post)
post.post_search_data.destroy!
post2 = Fabricate.build(:post,
raw: "",
post_type: Post.types[:small_action]
)
post2.save!(validate: false)
subject.rebuild_problem_posts(indexer: FakeIndexer)
expect(FakeIndexer.posts).to contain_exactly(post)
end
end
describe '#execute' do
it "should clean up post_search_data of posts with empty raw" do it "should clean up post_search_data of posts with empty raw" do
post = Fabricate(:post) post = Fabricate(:post)
post2 = Fabricate(:post, post_type: Post.types[:small_action]) post2 = Fabricate(:post, post_type: Post.types[:small_action])
@ -40,3 +78,4 @@ describe Jobs::ReindexSearch do
expect(PostSearchData.all).to contain_exactly(post.post_search_data) expect(PostSearchData.all).to contain_exactly(post.post_search_data)
end end
end end
end