PERF: Add partial index for non-pm search.

This commit is contained in:
Guo Xiang Tan 2020-08-18 14:51:17 +08:00
parent 33c21c5921
commit 92b7fe4c62
No known key found for this signature in database
GPG Key ID: FBD110179AAC1F20
5 changed files with 143 additions and 16 deletions

View File

@ -85,6 +85,7 @@ class SearchIndexer
"search_data" => tsvector,
}
yield params if block_given?
table_name.camelize.constantize.upsert(params)
rescue => e
if Rails.env.test?
@ -111,7 +112,7 @@ class SearchIndexer
)
end
def self.update_posts_index(post_id, topic_title, category_name, topic_tags, cooked)
def self.update_posts_index(post_id:, topic_title:, category_name:, topic_tags:, cooked:, private_message:)
update_index(
table: 'post',
id: post_id,
@ -119,7 +120,9 @@ class SearchIndexer
b_weight: category_name,
c_weight: topic_tags,
d_weight: scrub_html_for_search(cooked)
)
) do |params|
params["private_message"] = private_message
end
end
def self.update_users_index(user_id, username, name)
@ -204,7 +207,15 @@ class SearchIndexer
)
if topic
SearchIndexer.update_posts_index(obj.id, topic.title, category_name, tag_names, obj.cooked)
SearchIndexer.update_posts_index(
post_id: obj.id,
topic_title: topic.title,
category_name: category_name,
topic_tags: tag_names,
cooked: obj.cooked,
private_message: topic.private_message?
)
SearchIndexer.update_topics_index(topic.id, topic.title, obj.cooked) if obj.is_first_post?
end
end
@ -216,7 +227,15 @@ class SearchIndexer
if Topic === obj && (obj.saved_change_to_title? || force)
if obj.posts
if post = obj.posts.find_by(post_number: 1)
SearchIndexer.update_posts_index(post.id, obj.title, category_name, tag_names, post.cooked)
SearchIndexer.update_posts_index(
post_id: post.id,
topic_title: obj.title,
category_name: category_name,
topic_tags: tag_names,
cooked: post.cooked,
private_message: obj.private_message?
)
SearchIndexer.update_topics_index(obj.id, obj.title, post.cooked)
end
end

View File

@ -0,0 +1,51 @@
# frozen_string_literal: true
class AddPrivateMessageToPostSearchData < ActiveRecord::Migration[6.0]
def up
add_column :post_search_data, :private_message, :boolean
# Delete post_search_data of orphaned posts
execute <<~SQL
DELETE FROM post_search_data
WHERE post_id IN (
SELECT posts.id
FROM posts
LEFT JOIN topics ON topics.id = posts.topic_id
WHERE topics.id IS NULL
)
SQL
# Delete orphaned post_search_data
execute <<~SQL
DELETE FROM post_search_data
WHERE post_id IN (
SELECT post_search_data.post_id
FROM post_search_data
LEFT JOIN posts ON posts.id = post_search_data.post_id
WHERE posts.id IS NULL
)
SQL
execute <<~SQL
UPDATE post_search_data
SET private_message = true
FROM posts
INNER JOIN topics ON topics.id = posts.topic_id AND topics.archetype = 'private_message'
WHERE posts.id = post_search_data.post_id
SQL
execute <<~SQL
UPDATE post_search_data
SET private_message = false
FROM posts
INNER JOIN topics ON topics.id = posts.topic_id AND topics.archetype <> 'private_message'
WHERE posts.id = post_search_data.post_id
SQL
change_column_null(:post_search_data, :private_message, false)
end
def down
raise ActiveRecord::IrreversibleMigration
end
end

View File

@ -0,0 +1,17 @@
# frozen_string_literal: true
class CreatePartialIndexOnPostSearchData < ActiveRecord::Migration[6.0]
disable_ddl_transaction!
def up
execute <<~SQL
CREATE INDEX CONCURRENTLY idx_regular_post_search_data ON post_search_data USING GIN(search_data) WHERE NOT private_message
SQL
end
def down
execute <<~SQL
DROP INDEX IF EXISTS idx_regular_post_search_data;
SQL
end
end

View File

@ -818,22 +818,37 @@ class Search
.joins("LEFT JOIN categories ON categories.id = topics.category_id")
is_topic_search = @search_context.present? && @search_context.is_a?(Topic)
posts = posts.where("topics.visible") unless is_topic_search
if type_filter === "private_messages" || (is_topic_search && @search_context.private_message?)
posts = posts.where("topics.archetype = ?", Archetype.private_message)
posts = posts
.where(
"topics.archetype = ? AND post_search_data.private_message",
Archetype.private_message
)
unless @guardian.is_admin?
posts = posts.private_posts_for_user(@guardian.user)
end
unless @guardian.is_admin?
posts = posts.private_posts_for_user(@guardian.user)
end
elsif type_filter === "all_topics"
private_posts = posts.where("topics.archetype = ?", Archetype.private_message)
private_posts = private_posts.private_posts_for_user(@guardian.user)
private_posts = posts
.where(
"topics.archetype = ? AND post_search_data.private_message",
Archetype.private_message
)
.private_posts_for_user(@guardian.user)
posts = posts.where("topics.archetype <> ?", Archetype.private_message).or(private_posts)
posts = posts
.where(
"topics.archetype <> ? AND NOT post_search_data.private_message",
Archetype.private_message
)
.or(private_posts)
else
posts = posts.where("topics.archetype <> ?", Archetype.private_message)
posts = posts.where(
"topics.archetype <> ? AND NOT post_search_data.private_message",
Archetype.private_message
)
end
if @term.present?
@ -1162,6 +1177,8 @@ class Search
query.includes(topic: topic_eager_loads)
end
private
# Limited for performance reasons since `TS_HEADLINE` is slow when the text
# document is too long.
MAX_LENGTH_FOR_HEADLINE = 2500

View File

@ -21,7 +21,14 @@ describe SearchIndexer do
SiteSetting.default_locale = 'zh_CN'
data = "你好世界"
SearchIndexer.update_posts_index(post_id, "", "", "", data)
SearchIndexer.update_posts_index(
post_id: post_id,
topic_title: "",
category_name: "",
topic_tags: "",
cooked: data,
private_message: false
)
post_search_data = PostSearchData.find_by(post_id: post_id)
@ -95,11 +102,27 @@ describe SearchIndexer do
it 'correctly indexes a post according to version' do
# Preparing so that they can be indexed to right version
SearchIndexer.update_posts_index(post_id, "dummy", "", nil, nil)
SearchIndexer.update_posts_index(
post_id: post_id,
topic_title: "dummy",
category_name: "",
topic_tags: nil,
cooked: nil,
private_message: false
)
PostSearchData.find_by(post_id: post_id).update!(version: -1)
data = "<a>This</a> is a test"
SearchIndexer.update_posts_index(post_id, "", "", nil, data)
SearchIndexer.update_posts_index(
post_id: post_id,
topic_title: "",
category_name: "",
topic_tags: nil,
cooked: data,
private_message: false
)
raw_data, locale, version = PostSearchData.where(post_id: post_id).pluck(:raw_data, :locale, :version)[0]
expect(raw_data).to eq("This is a test")