PERF: Create a partial regular post_search_data index on large sites.

With the addition of `PostSearchData#private_message`, a partial
index consisting of only search data from regular posts can be created.
The partial index helps to speed up searches on large sites since PG
will not have to do an index scan on the entire search data index which
has shown to be a bottle neck.
This commit is contained in:
Guo Xiang Tan 2020-08-21 16:16:28 +08:00 committed by Alan Guo Xiang Tan
parent f7314f8ab4
commit 40c6d90df3
5 changed files with 126 additions and 17 deletions

View File

@ -0,0 +1,50 @@
# frozen_string_literal: true
module Jobs
class CreateRecentPostSearchIndexes < ::Jobs::Scheduled
every 1.day
REGULAR_POST_SEARCH_DATA_INDEX_NAME = 'idx_recent_regular_post_search_data'
def execute(_)
create_recent_regular_post_search_index
end
private
def create_recent_regular_post_search_index
if !PostSearchData.where(private_message: false).offset(SiteSetting.search_enable_recent_regular_posts_offset_size - 1).limit(1).exists?
return
end
SiteSetting.search_recent_regular_posts_offset_post_id = self.regular_offset_post_id
DB.exec(<<~SQL)
DROP INDEX IF EXISTS temp_idx_recent_regular_post_search_data;
SQL
DB.exec(<<~SQL, post_id: SiteSetting.search_recent_regular_posts_offset_post_id)
CREATE INDEX #{Rails.env.test? ? '' : 'CONCURRENTLY'} temp_idx_recent_regular_post_search_data
ON post_search_data USING GIN(search_data)
WHERE NOT private_message AND post_id >= :post_id
SQL
DB.exec(<<~SQL)
#{Rails.env.test? ? '' : "BEGIN;"}
DROP INDEX IF EXISTS #{REGULAR_POST_SEARCH_DATA_INDEX_NAME};
ALTER INDEX temp_idx_recent_regular_post_search_data RENAME TO #{REGULAR_POST_SEARCH_DATA_INDEX_NAME};
#{Rails.env.test? ? '' : "COMMIT;"}
SQL
end
def regular_offset_post_id
PostSearchData
.order('post_id DESC')
.where(private_message: false)
.offset(SiteSetting.search_recent_posts_size - 1)
.limit(1)
.pluck(:post_id)
.first
end
end
end

View File

@ -1824,6 +1824,12 @@ search:
search_recent_posts_size:
default: 100000
max: 100000
search_recent_regular_posts_offset_post_id:
default: 0
hidden: true
search_enable_recent_regular_posts_offset_size:
default: 200000
hidden: true
log_search_queries:
client: true
default: true

View File

@ -1090,7 +1090,18 @@ class Search
type_filter: opts[:type_filter]
}
min_id = Search.min_post_id
min_id =
if SiteSetting.search_recent_regular_posts_offset_post_id > 0
if %w{all_topics private_message}.include?(opts[:type_filter])
0
else
SiteSetting.search_recent_regular_posts_offset_post_id
end
else
# This is kept around for backwards compatibility.
# TODO: Drop this code path after Discourse 2.7 has been released.
Search.min_post_id
end
if @order == :likes
# likes are a pain to aggregate so skip

View File

@ -543,27 +543,24 @@ describe Search do
])
end
it "allows the configuration of search to prefer recent posts" do
SiteSetting.search_prefer_recent_posts = true
SiteSetting.search_recent_posts_size = 1
post = Fabricate(:post, topic: topic, raw: "this is a play post")
it "is able to search with an offset when configured" do
post_1 = Fabricate(:post, raw: "this is a play post")
SiteSetting.search_recent_regular_posts_offset_post_id = post_1.id + 1
results = Search.execute('play post')
expect(results.posts).to eq([post_1])
post_2 = Fabricate(:post, raw: "this is another play post")
SiteSetting.search_recent_regular_posts_offset_post_id = post_2.id
results = Search.execute('play post')
expect(results.posts.map(&:id)).to eq([
post.id
post_2.id,
post_1.id
])
post2 = Fabricate(:post, raw: "this is a play post")
results = Search.execute('play post')
expect(results.posts.map(&:id)).to eq([
post2.id,
post.id
])
ensure
Discourse.cache.clear
end
it 'allows staff to search for whispers' do

View File

@ -0,0 +1,45 @@
# frozen_string_literal: true
require 'rails_helper'
describe Jobs::CreateRecentPostSearchIndexes do
subject { described_class.new }
fab!(:post) do
SearchIndexer.enable
Fabricate(:post)
end
fab!(:post_2) do
SearchIndexer.enable
Fabricate(:post)
end
before do
SearchIndexer.enable
end
describe '#execute' do
it 'should not create the index if requried posts size has not been reached' do
SiteSetting.search_recent_posts_size = 1
SiteSetting.search_enable_recent_regular_posts_offset_size = 3
expect do
subject.execute({})
end.to_not change { SiteSetting.search_recent_regular_posts_offset_post_id }
end
it 'should create the right index' do
SiteSetting.search_recent_posts_size = 1
SiteSetting.search_enable_recent_regular_posts_offset_size = 1
subject.execute({})
expect(SiteSetting.search_recent_regular_posts_offset_post_id).to eq(post_2.id)
expect(DB.query_single(<<~SQL).first).to eq(1)
SELECT 1 FROM pg_indexes WHERE indexname = '#{described_class::REGULAR_POST_SEARCH_DATA_INDEX_NAME}'
SQL
end
end
end