PERF: new table used for title similarity search

This commit is contained in:
Sam 2014-08-08 15:50:26 +10:00
parent cd20c8e55f
commit c1cbf1b269
5 changed files with 45 additions and 4 deletions

View File

@ -35,6 +35,11 @@ class SearchObserver < ActiveRecord::Observer
# don't allow concurrency to mess up saving a post # don't allow concurrency to mess up saving a post
end end
def self.update_topics_index(topic_id, title, cooked)
search_data = title.dup << " " << scrub_html_for_search(cooked)[0...Topic::MAX_SIMILAR_BODY_LENGTH]
update_index('topic', topic_id, search_data)
end
def self.update_posts_index(post_id, cooked, title, category) def self.update_posts_index(post_id, cooked, title, category)
search_data = scrub_html_for_search(cooked) << " " << title search_data = scrub_html_for_search(cooked) << " " << title
search_data << " " << category if category search_data << " " << category if category
@ -55,6 +60,7 @@ class SearchObserver < ActiveRecord::Observer
if obj.topic if obj.topic
category_name = obj.topic.category.name if obj.topic.category category_name = obj.topic.category.name if obj.topic.category
SearchObserver.update_posts_index(obj.id, obj.cooked, obj.topic.title, category_name) SearchObserver.update_posts_index(obj.id, obj.cooked, obj.topic.title, category_name)
SearchObserver.update_topics_index(obj.topic_id, obj.topic.title, obj.cooked) if obj.post_number == 1
else else
Rails.logger.warn("Orphan post skipped in search_observer, topic_id: #{obj.topic_id} post_id: #{obj.id} raw: #{obj.raw}") Rails.logger.warn("Orphan post skipped in search_observer, topic_id: #{obj.topic_id} post_id: #{obj.id} raw: #{obj.raw}")
end end
@ -69,6 +75,7 @@ class SearchObserver < ActiveRecord::Observer
if post if post
category_name = obj.category.name if obj.category category_name = obj.category.name if obj.category
SearchObserver.update_posts_index(post.id, post.cooked, obj.title, category_name) SearchObserver.update_posts_index(post.id, post.cooked, obj.title, category_name)
SearchObserver.update_topics_index(obj.id, obj.title, post.cooked)
end end
end end
end end

View File

@ -358,12 +358,13 @@ class Topic < ActiveRecord::Base
archetype == Archetype.private_message archetype == Archetype.private_message
end end
MAX_SIMILAR_BODY_LENGTH = 200
# Search for similar topics # Search for similar topics
def self.similar_to(title, raw, user=nil) def self.similar_to(title, raw, user=nil)
return [] unless title.present? return [] unless title.present?
return [] unless raw.present? return [] unless raw.present?
filter_words = Search.prepare_data(title + " " + raw[0...200]); filter_words = Search.prepare_data(title + " " + raw[0...MAX_SIMILAR_BODY_LENGTH]);
ts_query = Search.ts_query(filter_words, nil, "|") ts_query = Search.ts_query(filter_words, nil, "|")
# Exclude category definitions from similar topic suggestions # Exclude category definitions from similar topic suggestions
@ -371,8 +372,7 @@ class Topic < ActiveRecord::Base
candidates = Topic.visible candidates = Topic.visible
.secured(Guardian.new(user)) .secured(Guardian.new(user))
.listable_topics .listable_topics
.joins('JOIN posts p ON p.topic_id = topics.id AND p.post_number = 1') .joins('JOIN topic_search_data s ON topics.id = s.topic_id')
.joins('JOIN post_search_data s ON p.id = s.post_id')
.where("search_data @@ #{ts_query}") .where("search_data @@ #{ts_query}")
.order("ts_rank(search_data, #{ts_query}) DESC") .order("ts_rank(search_data, #{ts_query}) DESC")
.limit(SiteSetting.max_similar_results * 3) .limit(SiteSetting.max_similar_results * 3)

View File

@ -0,0 +1,17 @@
class CreateTopicSearchIndex < ActiveRecord::Migration
def up
# used for similarity search
create_table :topic_search_data, id: false do |t|
t.integer :topic_id, null: false, primary_key: true
t.text :raw_data
t.string :locale, null: false
t.tsvector :search_data
end
execute "CREATE INDEX idx_search_topic ON topic_search_data USING gin (search_data)"
end
def down
drop_table :topic_search_data
end
end

View File

@ -55,6 +55,19 @@ class Search
SearchObserver.index(post) SearchObserver.index(post)
end end
posts = Post.joins(:topic)
.where('posts.id IN (
SELECT p2.id FROM posts p2
LEFT JOIN topic_search_data pd ON locale = ? AND p2.topic_id = pd.topic_id
WHERE pd.topic_id IS NULL AND p2.post_number = 1
)', SiteSetting.default_locale).limit(10000)
posts.each do |post|
# force indexing
post.cooked += " "
SearchObserver.index(post)
end
nil nil
end end

View File

@ -3,7 +3,7 @@ task "search:reindex" => :environment do
puts "Reindexing #{db}" puts "Reindexing #{db}"
puts "" puts ""
puts "Posts:" puts "Posts:"
Post.exec_sql("select p.id, p.cooked, c.name category, t.title from Post.exec_sql("select p.id, p.cooked, c.name category, t.title, p.post_number, t.id topic_id from
posts p posts p
join topics t on t.id = p.topic_id join topics t on t.id = p.topic_id
left join categories c on c.id = t.category_id left join categories c on c.id = t.category_id
@ -12,7 +12,11 @@ task "search:reindex" => :environment do
cooked = p["cooked"] cooked = p["cooked"]
title = p["title"] title = p["title"]
category = p["cat"] category = p["cat"]
post_number = p["post_number"].to_i
topic_id = p["topic_id"].to_i
SearchObserver.update_posts_index(post_id, cooked, title, category) SearchObserver.update_posts_index(post_id, cooked, title, category)
SearchObserver.update_topics_index(topic_id, title , cooked) if post_number == 1
putc "." putc "."
end end