PERF: new table used for title similarity search
This commit is contained in:
parent
cd20c8e55f
commit
c1cbf1b269
|
@ -35,6 +35,11 @@ class SearchObserver < ActiveRecord::Observer
|
|||
# don't allow concurrency to mess up saving a post
|
||||
end
|
||||
|
||||
def self.update_topics_index(topic_id, title, cooked)
|
||||
search_data = title.dup << " " << scrub_html_for_search(cooked)[0...Topic::MAX_SIMILAR_BODY_LENGTH]
|
||||
update_index('topic', topic_id, search_data)
|
||||
end
|
||||
|
||||
def self.update_posts_index(post_id, cooked, title, category)
|
||||
search_data = scrub_html_for_search(cooked) << " " << title
|
||||
search_data << " " << category if category
|
||||
|
@ -55,6 +60,7 @@ class SearchObserver < ActiveRecord::Observer
|
|||
if obj.topic
|
||||
category_name = obj.topic.category.name if obj.topic.category
|
||||
SearchObserver.update_posts_index(obj.id, obj.cooked, obj.topic.title, category_name)
|
||||
SearchObserver.update_topics_index(obj.topic_id, obj.topic.title, obj.cooked) if obj.post_number == 1
|
||||
else
|
||||
Rails.logger.warn("Orphan post skipped in search_observer, topic_id: #{obj.topic_id} post_id: #{obj.id} raw: #{obj.raw}")
|
||||
end
|
||||
|
@ -69,6 +75,7 @@ class SearchObserver < ActiveRecord::Observer
|
|||
if post
|
||||
category_name = obj.category.name if obj.category
|
||||
SearchObserver.update_posts_index(post.id, post.cooked, obj.title, category_name)
|
||||
SearchObserver.update_topics_index(obj.id, obj.title, post.cooked)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -358,12 +358,13 @@ class Topic < ActiveRecord::Base
|
|||
archetype == Archetype.private_message
|
||||
end
|
||||
|
||||
MAX_SIMILAR_BODY_LENGTH = 200
|
||||
# Search for similar topics
|
||||
def self.similar_to(title, raw, user=nil)
|
||||
return [] unless title.present?
|
||||
return [] unless raw.present?
|
||||
|
||||
filter_words = Search.prepare_data(title + " " + raw[0...200]);
|
||||
filter_words = Search.prepare_data(title + " " + raw[0...MAX_SIMILAR_BODY_LENGTH]);
|
||||
ts_query = Search.ts_query(filter_words, nil, "|")
|
||||
|
||||
# Exclude category definitions from similar topic suggestions
|
||||
|
@ -371,8 +372,7 @@ class Topic < ActiveRecord::Base
|
|||
candidates = Topic.visible
|
||||
.secured(Guardian.new(user))
|
||||
.listable_topics
|
||||
.joins('JOIN posts p ON p.topic_id = topics.id AND p.post_number = 1')
|
||||
.joins('JOIN post_search_data s ON p.id = s.post_id')
|
||||
.joins('JOIN topic_search_data s ON topics.id = s.topic_id')
|
||||
.where("search_data @@ #{ts_query}")
|
||||
.order("ts_rank(search_data, #{ts_query}) DESC")
|
||||
.limit(SiteSetting.max_similar_results * 3)
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
class CreateTopicSearchIndex < ActiveRecord::Migration
|
||||
def up
|
||||
# used for similarity search
|
||||
create_table :topic_search_data, id: false do |t|
|
||||
t.integer :topic_id, null: false, primary_key: true
|
||||
t.text :raw_data
|
||||
t.string :locale, null: false
|
||||
t.tsvector :search_data
|
||||
end
|
||||
|
||||
execute "CREATE INDEX idx_search_topic ON topic_search_data USING gin (search_data)"
|
||||
end
|
||||
|
||||
def down
|
||||
drop_table :topic_search_data
|
||||
end
|
||||
end
|
|
@ -55,6 +55,19 @@ class Search
|
|||
SearchObserver.index(post)
|
||||
end
|
||||
|
||||
posts = Post.joins(:topic)
|
||||
.where('posts.id IN (
|
||||
SELECT p2.id FROM posts p2
|
||||
LEFT JOIN topic_search_data pd ON locale = ? AND p2.topic_id = pd.topic_id
|
||||
WHERE pd.topic_id IS NULL AND p2.post_number = 1
|
||||
)', SiteSetting.default_locale).limit(10000)
|
||||
|
||||
posts.each do |post|
|
||||
# force indexing
|
||||
post.cooked += " "
|
||||
SearchObserver.index(post)
|
||||
end
|
||||
|
||||
nil
|
||||
end
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ task "search:reindex" => :environment do
|
|||
puts "Reindexing #{db}"
|
||||
puts ""
|
||||
puts "Posts:"
|
||||
Post.exec_sql("select p.id, p.cooked, c.name category, t.title from
|
||||
Post.exec_sql("select p.id, p.cooked, c.name category, t.title, p.post_number, t.id topic_id from
|
||||
posts p
|
||||
join topics t on t.id = p.topic_id
|
||||
left join categories c on c.id = t.category_id
|
||||
|
@ -12,7 +12,11 @@ task "search:reindex" => :environment do
|
|||
cooked = p["cooked"]
|
||||
title = p["title"]
|
||||
category = p["cat"]
|
||||
post_number = p["post_number"].to_i
|
||||
topic_id = p["topic_id"].to_i
|
||||
|
||||
SearchObserver.update_posts_index(post_id, cooked, title, category)
|
||||
SearchObserver.update_topics_index(topic_id, title , cooked) if post_number == 1
|
||||
|
||||
putc "."
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue