PERF: new table used for title similarity search
This commit is contained in:
parent
cd20c8e55f
commit
c1cbf1b269
|
@ -35,6 +35,11 @@ class SearchObserver < ActiveRecord::Observer
|
||||||
# don't allow concurrency to mess up saving a post
|
# don't allow concurrency to mess up saving a post
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.update_topics_index(topic_id, title, cooked)
|
||||||
|
search_data = title.dup << " " << scrub_html_for_search(cooked)[0...Topic::MAX_SIMILAR_BODY_LENGTH]
|
||||||
|
update_index('topic', topic_id, search_data)
|
||||||
|
end
|
||||||
|
|
||||||
def self.update_posts_index(post_id, cooked, title, category)
|
def self.update_posts_index(post_id, cooked, title, category)
|
||||||
search_data = scrub_html_for_search(cooked) << " " << title
|
search_data = scrub_html_for_search(cooked) << " " << title
|
||||||
search_data << " " << category if category
|
search_data << " " << category if category
|
||||||
|
@ -55,6 +60,7 @@ class SearchObserver < ActiveRecord::Observer
|
||||||
if obj.topic
|
if obj.topic
|
||||||
category_name = obj.topic.category.name if obj.topic.category
|
category_name = obj.topic.category.name if obj.topic.category
|
||||||
SearchObserver.update_posts_index(obj.id, obj.cooked, obj.topic.title, category_name)
|
SearchObserver.update_posts_index(obj.id, obj.cooked, obj.topic.title, category_name)
|
||||||
|
SearchObserver.update_topics_index(obj.topic_id, obj.topic.title, obj.cooked) if obj.post_number == 1
|
||||||
else
|
else
|
||||||
Rails.logger.warn("Orphan post skipped in search_observer, topic_id: #{obj.topic_id} post_id: #{obj.id} raw: #{obj.raw}")
|
Rails.logger.warn("Orphan post skipped in search_observer, topic_id: #{obj.topic_id} post_id: #{obj.id} raw: #{obj.raw}")
|
||||||
end
|
end
|
||||||
|
@ -69,6 +75,7 @@ class SearchObserver < ActiveRecord::Observer
|
||||||
if post
|
if post
|
||||||
category_name = obj.category.name if obj.category
|
category_name = obj.category.name if obj.category
|
||||||
SearchObserver.update_posts_index(post.id, post.cooked, obj.title, category_name)
|
SearchObserver.update_posts_index(post.id, post.cooked, obj.title, category_name)
|
||||||
|
SearchObserver.update_topics_index(obj.id, obj.title, post.cooked)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -358,12 +358,13 @@ class Topic < ActiveRecord::Base
|
||||||
archetype == Archetype.private_message
|
archetype == Archetype.private_message
|
||||||
end
|
end
|
||||||
|
|
||||||
|
MAX_SIMILAR_BODY_LENGTH = 200
|
||||||
# Search for similar topics
|
# Search for similar topics
|
||||||
def self.similar_to(title, raw, user=nil)
|
def self.similar_to(title, raw, user=nil)
|
||||||
return [] unless title.present?
|
return [] unless title.present?
|
||||||
return [] unless raw.present?
|
return [] unless raw.present?
|
||||||
|
|
||||||
filter_words = Search.prepare_data(title + " " + raw[0...200]);
|
filter_words = Search.prepare_data(title + " " + raw[0...MAX_SIMILAR_BODY_LENGTH]);
|
||||||
ts_query = Search.ts_query(filter_words, nil, "|")
|
ts_query = Search.ts_query(filter_words, nil, "|")
|
||||||
|
|
||||||
# Exclude category definitions from similar topic suggestions
|
# Exclude category definitions from similar topic suggestions
|
||||||
|
@ -371,8 +372,7 @@ class Topic < ActiveRecord::Base
|
||||||
candidates = Topic.visible
|
candidates = Topic.visible
|
||||||
.secured(Guardian.new(user))
|
.secured(Guardian.new(user))
|
||||||
.listable_topics
|
.listable_topics
|
||||||
.joins('JOIN posts p ON p.topic_id = topics.id AND p.post_number = 1')
|
.joins('JOIN topic_search_data s ON topics.id = s.topic_id')
|
||||||
.joins('JOIN post_search_data s ON p.id = s.post_id')
|
|
||||||
.where("search_data @@ #{ts_query}")
|
.where("search_data @@ #{ts_query}")
|
||||||
.order("ts_rank(search_data, #{ts_query}) DESC")
|
.order("ts_rank(search_data, #{ts_query}) DESC")
|
||||||
.limit(SiteSetting.max_similar_results * 3)
|
.limit(SiteSetting.max_similar_results * 3)
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
class CreateTopicSearchIndex < ActiveRecord::Migration
|
||||||
|
def up
|
||||||
|
# used for similarity search
|
||||||
|
create_table :topic_search_data, id: false do |t|
|
||||||
|
t.integer :topic_id, null: false, primary_key: true
|
||||||
|
t.text :raw_data
|
||||||
|
t.string :locale, null: false
|
||||||
|
t.tsvector :search_data
|
||||||
|
end
|
||||||
|
|
||||||
|
execute "CREATE INDEX idx_search_topic ON topic_search_data USING gin (search_data)"
|
||||||
|
end
|
||||||
|
|
||||||
|
def down
|
||||||
|
drop_table :topic_search_data
|
||||||
|
end
|
||||||
|
end
|
|
@ -55,6 +55,19 @@ class Search
|
||||||
SearchObserver.index(post)
|
SearchObserver.index(post)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
posts = Post.joins(:topic)
|
||||||
|
.where('posts.id IN (
|
||||||
|
SELECT p2.id FROM posts p2
|
||||||
|
LEFT JOIN topic_search_data pd ON locale = ? AND p2.topic_id = pd.topic_id
|
||||||
|
WHERE pd.topic_id IS NULL AND p2.post_number = 1
|
||||||
|
)', SiteSetting.default_locale).limit(10000)
|
||||||
|
|
||||||
|
posts.each do |post|
|
||||||
|
# force indexing
|
||||||
|
post.cooked += " "
|
||||||
|
SearchObserver.index(post)
|
||||||
|
end
|
||||||
|
|
||||||
nil
|
nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ task "search:reindex" => :environment do
|
||||||
puts "Reindexing #{db}"
|
puts "Reindexing #{db}"
|
||||||
puts ""
|
puts ""
|
||||||
puts "Posts:"
|
puts "Posts:"
|
||||||
Post.exec_sql("select p.id, p.cooked, c.name category, t.title from
|
Post.exec_sql("select p.id, p.cooked, c.name category, t.title, p.post_number, t.id topic_id from
|
||||||
posts p
|
posts p
|
||||||
join topics t on t.id = p.topic_id
|
join topics t on t.id = p.topic_id
|
||||||
left join categories c on c.id = t.category_id
|
left join categories c on c.id = t.category_id
|
||||||
|
@ -12,7 +12,11 @@ task "search:reindex" => :environment do
|
||||||
cooked = p["cooked"]
|
cooked = p["cooked"]
|
||||||
title = p["title"]
|
title = p["title"]
|
||||||
category = p["cat"]
|
category = p["cat"]
|
||||||
|
post_number = p["post_number"].to_i
|
||||||
|
topic_id = p["topic_id"].to_i
|
||||||
|
|
||||||
SearchObserver.update_posts_index(post_id, cooked, title, category)
|
SearchObserver.update_posts_index(post_id, cooked, title, category)
|
||||||
|
SearchObserver.update_topics_index(topic_id, title , cooked) if post_number == 1
|
||||||
|
|
||||||
putc "."
|
putc "."
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue