FIX: rebuild index when engine replaced (#5021)

This commit is contained in:
Erick Guan 2017-08-16 13:38:34 +02:00 committed by Sam
parent b77aa29e71
commit 6e59149a77
7 changed files with 136 additions and 51 deletions

View File

@ -4,7 +4,75 @@ module Jobs
every 1.day
def execute(args)
Search.rebuild_problem_posts
rebuild_problem_topics
rebuild_problem_posts
rebuild_problem_categories
rebuild_problem_users
end
def rebuild_problem_categories(limit = 10000)
categories = load_problem_categories(limit)
categories.each do |category|
SearchIndexer.index(category, force: true)
end
end
def rebuild_problem_users(limit = 10000)
users = load_problem_users(limit)
users.each do |user|
SearchIndexer.index(user, force: true)
end
end
def rebuild_problem_topics(limit = 10000)
topics = load_problem_topics(limit)
topics.each do |topic|
SearchIndexer.index(topic, force: true)
end
end
def rebuild_problem_posts(limit = 10000)
posts = load_problem_posts(limit)
posts.each do |post|
SearchIndexer.index(post, force: true)
end
end
private
def load_problem_posts(limit)
Post.joins(:topic)
.where('posts.id IN (
SELECT p2.id FROM posts p2
LEFT JOIN post_search_data pd ON pd.locale = ? AND pd.version = ? AND p2.id = pd.post_id
WHERE pd.post_id IS NULL
)', SiteSetting.default_locale, Search::INDEX_VERSION)
.limit(limit)
end
def load_problem_categories(limit)
Category.joins(:category_search_data)
.where('category_search_data.locale != ?
OR category_search_data.version != ?', SiteSetting.default_locale, Search::INDEX_VERSION)
.limit(limit)
end
def load_problem_topics(limit)
Topic.joins(:topic_search_data)
.where('topic_search_data.locale != ?
OR topic_search_data.version != ?', SiteSetting.default_locale, Search::INDEX_VERSION)
.limit(limit)
end
def load_problem_users(limit)
User.joins(:user_search_data)
.where('user_search_data.locale != ?
OR user_search_data.version != ?', SiteSetting.default_locale, Search::INDEX_VERSION)
.limit(limit)
end
end
end

View File

@ -42,20 +42,23 @@ class SearchIndexer
SET
raw_data = :raw_data,
locale = :locale,
search_data = TO_TSVECTOR('#{stemmer}', :search_data)
search_data = TO_TSVECTOR('#{stemmer}', :search_data),
version = :version
WHERE #{foreign_key} = :id",
raw_data: raw_data,
search_data: search_data,
id: id,
locale: SiteSetting.default_locale)
locale: SiteSetting.default_locale,
version: Search::INDEX_VERSION)
if rows == 0
Post.exec_sql("INSERT INTO #{table_name}
(#{foreign_key}, search_data, locale, raw_data)
VALUES (:id, TO_TSVECTOR('#{stemmer}', :search_data), :locale, :raw_data)",
(#{foreign_key}, search_data, locale, raw_data, version)
VALUES (:id, TO_TSVECTOR('#{stemmer}', :search_data), :locale, :raw_data, :version)",
raw_data: raw_data,
search_data: search_data,
id: id,
locale: SiteSetting.default_locale)
locale: SiteSetting.default_locale,
version: Search::INDEX_VERSION)
end
rescue
# don't allow concurrency to mess up saving a post
@ -81,10 +84,10 @@ class SearchIndexer
update_index('category', category_id, name)
end
def self.index(obj)
def self.index(obj, force: false)
return if @disabled
if obj.class == Post && obj.cooked_changed?
if obj.class == Post && (obj.cooked_changed? || force)
if obj.topic
category_name = obj.topic.category.name if obj.topic.category
SearchIndexer.update_posts_index(obj.id, obj.cooked, obj.topic.title, category_name)
@ -93,11 +96,12 @@ class SearchIndexer
Rails.logger.warn("Orphan post skipped in search_indexer, topic_id: #{obj.topic_id} post_id: #{obj.id} raw: #{obj.raw}")
end
end
if obj.class == User && (obj.username_changed? || obj.name_changed?)
if obj.class == User && (obj.username_changed? || obj.name_changed? || force)
SearchIndexer.update_users_index(obj.id, obj.username_lower || '', obj.name ? obj.name.downcase : '')
end
if obj.class == Topic && obj.title_changed?
if obj.class == Topic && (obj.title_changed? || force)
if obj.posts
post = obj.posts.find_by(post_number: 1)
if post
@ -108,7 +112,7 @@ class SearchIndexer
end
end
if obj.class == Category && obj.name_changed?
if obj.class == Category && (obj.name_changed? || force)
SearchIndexer.update_categories_index(obj.id, obj.name)
end
end

View File

@ -0,0 +1,8 @@
class AddVersionToSearchData < ActiveRecord::Migration
def change
add_column :post_search_data, :version, :integer, default: 0
add_column :topic_search_data, :version, :integer, default: 0
add_column :category_search_data, :version, :integer, default: 0
add_column :user_search_data, :version, :integer, default: 0
end
end

View File

@ -1,6 +1,7 @@
require_dependency 'search/grouped_search_results'
class Search
INDEX_VERSION = 1.freeze
def self.per_facet
5
@ -45,36 +46,6 @@ class Search
end
end
def self.rebuild_problem_posts(limit = 10000)
posts = Post.joins(:topic)
.where('posts.id IN (
SELECT p2.id FROM posts p2
LEFT JOIN post_search_data pd ON locale = ? AND p2.id = pd.post_id
WHERE pd.post_id IS NULL
)', SiteSetting.default_locale).limit(limit)
posts.each do |post|
# force indexing
post.cooked += " "
SearchIndexer.index(post)
end
posts = Post.joins(:topic)
.where('posts.id IN (
SELECT p2.id FROM posts p2
LEFT JOIN topic_search_data pd ON locale = ? AND p2.topic_id = pd.topic_id
WHERE pd.topic_id IS NULL AND p2.post_number = 1
)', SiteSetting.default_locale).limit(limit)
posts.each do |post|
# force indexing
post.cooked += " "
SearchIndexer.index(post)
end
nil
end
def self.prepare_data(search_data, purpose = :query)
data = search_data.squish
# TODO cppjieba_rb is designed for chinese, we need something else for Korean / Japanese

View File

@ -0,0 +1,31 @@
require 'rails_helper'
describe Jobs::ReindexSearch do
before { SearchIndexer.enable }
after { SearchIndexer.disable }
let(:locale) { 'fr' }
# This works since test db has a small record less than limit.
# Didn't check `topic` because topic doesn't have posts in fabrication
# thus no search data
%w(post category user).each do |m|
it "should rebuild `#{m}` when default_locale changed" do
SiteSetting.default_locale = 'en'
model = Fabricate(m.to_sym)
SiteSetting.default_locale = locale
subject.execute({})
expect(model.send("#{m}_search_data").locale).to eq locale
end
it "should rebuild `#{m}` when INDEX_VERSION changed" do
model = Fabricate(m.to_sym)
# so that search data can be reindexed
search_data = model.send("#{m}_search_data")
search_data.update_attributes!(version: 0)
model.reload
subject.execute({})
expect(model.send("#{m}_search_data").version).to eq Search::INDEX_VERSION
end
end
end

View File

@ -1,30 +1,34 @@
require 'rails_helper'
describe SearchIndexer do
let(:post_id) { 99 }
it 'correctly indexes chinese' do
SiteSetting.default_locale = 'zh_CN'
data = "你好世界"
expect(data.split(" ").length).to eq(1)
SearchIndexer.update_posts_index(99, "你好世界", "", nil)
SearchIndexer.update_posts_index(post_id, "你好世界", "", nil)
raw_data = PostSearchData.where(post_id: 99).pluck(:raw_data)[0]
raw_data = PostSearchData.where(post_id: post_id).pluck(:raw_data)[0]
expect(raw_data.split(' ').length).to eq(2)
end
it 'correctly indexes a post' do
it 'correctly indexes a post according to version' do
# Preparing so that they can be indexed to right version
SearchIndexer.update_posts_index(post_id, "dummy", "", nil)
PostSearchData.find_by(post_id: post_id).update_attributes!(version: -1)
data = "<a>This</a> is a test"
SearchIndexer.update_posts_index(post_id, data, "", nil)
SearchIndexer.update_posts_index(99, data, "", nil)
raw_data, locale = PostSearchData.where(post_id: 99).pluck(:raw_data, :locale)[0]
raw_data, locale, version = PostSearchData.where(post_id: post_id).pluck(:raw_data, :locale, :version)[0]
expect(raw_data).to eq("This is a test")
expect(locale).to eq("en")
expect(version).to eq(Search::INDEX_VERSION)
SearchIndexer.update_posts_index(99, "tester", "", nil)
SearchIndexer.update_posts_index(post_id, "tester", "", nil)
raw_data = PostSearchData.where(post_id: 99).pluck(:raw_data)[0]
raw_data = PostSearchData.where(post_id: post_id).pluck(:raw_data)[0]
expect(raw_data).to eq("tester")
end
end

View File

@ -84,5 +84,4 @@ module Helpers
def email(email_name)
fixture_file("emails/#{email_name}.eml")
end
end