FEATURE: An option to search more recent posts for very large sites.

On very large forums searching posts can be slow, so this commit
introduces the ability to try and search only the most recent posts
first, and then going for a larger breadth search if there aren't
enough results.

Enable `search_prefer_recent_posts` and you can customize how many
recent posts to filter with `search_recent_posts_size`
This commit is contained in:
Robin Ward 2016-08-10 15:40:58 -04:00
parent 25a14fcba8
commit fc311dbe3b
5 changed files with 57 additions and 18 deletions

View File

@ -828,6 +828,8 @@ en:
min_private_message_title_length: "Minimum allowed title length for a message in characters"
min_search_term_length: "Minimum valid search term length in characters"
search_tokenize_chinese_japanese_korean: "Force search to tokenize Chinese/Japanese/Korean even on non CJK sites"
search_prefer_recent_posts: "If searching your large forum is slow, this option tries an index of more recent posts first"
search_recent_posts_size: "How many recent posts to keep in the index"
allow_uncategorized_topics: "Allow topics to be created without a category. WARNING: If there are any uncategorized topics, you must recategorize them before turning this off."
allow_duplicate_topic_titles: "Allow topics with identical, duplicate titles."
unique_posts_mins: "How many minutes before a user can make a post with the same content again"

View File

@ -1047,6 +1047,9 @@ uncategorized:
default: 3
search_tokenize_chinese_japanese_korean: false
search_prefer_recent_posts: false
search_recent_posts_size: 100000
max_similar_results: 5
minimum_topics_similar: 50

View File

@ -128,6 +128,24 @@ class Search
end
end
def self.min_post_id_no_cache
return 0 unless SiteSetting.search_prefer_recent_posts?
offset = Post.count - SiteSetting.search_recent_posts_size
return 0 if offset <= 0
Post.order(:id).offset(offset).limit(1).pluck(:id)[0]
end
def self.min_post_id(opts=nil)
return 0 unless SiteSetting.search_prefer_recent_posts?
# It can be quite slow to count all the posts so let's cache it
Rails.cache.fetch("search-min-post-id:#{SiteSetting.search_recent_posts_size}", expires_in: 1.day) do
min_post_id_no_cache
end
end
attr_accessor :term
def initialize(term, opts=nil)
@ -545,7 +563,16 @@ class Search
posts = posts.joins('JOIN users u ON u.id = posts.user_id')
posts = posts.where("posts.raw || ' ' || u.username || ' ' || COALESCE(u.name, '') ilike ?", "%#{term_without_quote}%")
else
posts = posts.where("post_search_data.search_data @@ #{ts_query}")
min_id = Search.min_post_id
if min_id > 0
fast_query = posts.dup.where("post_search_data.post_id >= #{min_id}")
posts = fast_query if fast_query.dup.count >= 20
end
exact_terms = @term.scan(/"([^"]+)"/).flatten
exact_terms.each do |exact|
posts = posts.where("posts.raw ilike ?", "%#{exact}%")
@ -669,7 +696,7 @@ class Search
.to_sql
else
posts_query(@limit, aggregate_search: true,
private_messages: opts[:private_messages])
private_messages: opts[:private_messages])
.select('topics.id', "#{min_or_max}(post_number) post_number")
.group('topics.id')
.to_sql

View File

@ -644,4 +644,21 @@ describe Search do
end
end
context "#min_post_id" do
it "returns 0 when prefer_recent_posts is disabled" do
SiteSetting.search_prefer_recent_posts = false
expect(Search.min_post_id_no_cache).to eq(0)
end
it "returns a value when prefer_recent_posts is enabled" do
SiteSetting.search_prefer_recent_posts = true
SiteSetting.search_recent_posts_size = 1
Fabricate(:post)
p2 = Fabricate(:post)
expect(Search.min_post_id_no_cache).to eq(p2.id)
end
end
end

View File

@ -2,14 +2,6 @@ require 'rails_helper'
describe SearchObserver do
def get_row(post_id)
SqlBuilder.map_exec(
OpenStruct,
"select * from post_search_data where post_id = :post_id",
post_id: post_id
).first
end
it 'correctly indexes chinese' do
SiteSetting.default_locale = 'zh_CN'
data = "你好世界"
@ -17,8 +9,8 @@ describe SearchObserver do
SearchObserver.update_posts_index(99, "你好世界", "", nil)
row = get_row(99)
expect(row.raw_data.split(' ').length).to eq(2)
raw_data = PostSearchData.where(post_id: 99).pluck(:raw_data)[0]
expect(raw_data.split(' ').length).to eq(2)
end
it 'correctly indexes a post' do
@ -26,15 +18,13 @@ describe SearchObserver do
SearchObserver.update_posts_index(99, data, "", nil)
row = get_row(99)
expect(row.raw_data).to eq("This is a test")
expect(row.locale).to eq("en")
raw_data, locale = PostSearchData.where(post_id: 99).pluck(:raw_data, :locale)[0]
expect(raw_data).to eq("This is a test")
expect(locale).to eq("en")
SearchObserver.update_posts_index(99, "tester", "", nil)
row = get_row(99)
expect(row.raw_data).to eq("tester")
raw_data = PostSearchData.where(post_id: 99).pluck(:raw_data)[0]
expect(raw_data).to eq("tester")
end
end