From c1cbf1b2690367b7417dd4e1f79ff98abae76d1e Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 8 Aug 2014 15:50:26 +1000 Subject: [PATCH] PERF: new table used for title similarity search --- app/models/search_observer.rb | 7 +++++++ app/models/topic.rb | 6 +++--- .../20140808051823_create_topic_search_index.rb | 17 +++++++++++++++++ lib/search.rb | 13 +++++++++++++ lib/tasks/search.rake | 6 +++++- 5 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 db/migrate/20140808051823_create_topic_search_index.rb diff --git a/app/models/search_observer.rb b/app/models/search_observer.rb index 89798a34ee9..24d8548bd2a 100644 --- a/app/models/search_observer.rb +++ b/app/models/search_observer.rb @@ -35,6 +35,11 @@ class SearchObserver < ActiveRecord::Observer # don't allow concurrency to mess up saving a post end + def self.update_topics_index(topic_id, title, cooked) + search_data = title.dup << " " << scrub_html_for_search(cooked)[0...Topic::MAX_SIMILAR_BODY_LENGTH] + update_index('topic', topic_id, search_data) + end + def self.update_posts_index(post_id, cooked, title, category) search_data = scrub_html_for_search(cooked) << " " << title search_data << " " << category if category @@ -55,6 +60,7 @@ class SearchObserver < ActiveRecord::Observer if obj.topic category_name = obj.topic.category.name if obj.topic.category SearchObserver.update_posts_index(obj.id, obj.cooked, obj.topic.title, category_name) + SearchObserver.update_topics_index(obj.topic_id, obj.topic.title, obj.cooked) if obj.post_number == 1 else Rails.logger.warn("Orphan post skipped in search_observer, topic_id: #{obj.topic_id} post_id: #{obj.id} raw: #{obj.raw}") end @@ -69,6 +75,7 @@ class SearchObserver < ActiveRecord::Observer if post category_name = obj.category.name if obj.category SearchObserver.update_posts_index(post.id, post.cooked, obj.title, category_name) + SearchObserver.update_topics_index(obj.id, obj.title, post.cooked) end end end diff --git a/app/models/topic.rb b/app/models/topic.rb index 5c5edfbf175..8acf4f971df 100644 --- a/app/models/topic.rb +++ b/app/models/topic.rb @@ -358,12 +358,13 @@ class Topic < ActiveRecord::Base archetype == Archetype.private_message end + MAX_SIMILAR_BODY_LENGTH = 200 # Search for similar topics def self.similar_to(title, raw, user=nil) return [] unless title.present? return [] unless raw.present? - filter_words = Search.prepare_data(title + " " + raw[0...200]); + filter_words = Search.prepare_data(title + " " + raw[0...MAX_SIMILAR_BODY_LENGTH]); ts_query = Search.ts_query(filter_words, nil, "|") # Exclude category definitions from similar topic suggestions @@ -371,8 +372,7 @@ class Topic < ActiveRecord::Base candidates = Topic.visible .secured(Guardian.new(user)) .listable_topics - .joins('JOIN posts p ON p.topic_id = topics.id AND p.post_number = 1') - .joins('JOIN post_search_data s ON p.id = s.post_id') + .joins('JOIN topic_search_data s ON topics.id = s.topic_id') .where("search_data @@ #{ts_query}") .order("ts_rank(search_data, #{ts_query}) DESC") .limit(SiteSetting.max_similar_results * 3) diff --git a/db/migrate/20140808051823_create_topic_search_index.rb b/db/migrate/20140808051823_create_topic_search_index.rb new file mode 100644 index 00000000000..1a154e6a082 --- /dev/null +++ b/db/migrate/20140808051823_create_topic_search_index.rb @@ -0,0 +1,17 @@ +class CreateTopicSearchIndex < ActiveRecord::Migration + def up + # used for similarity search + create_table :topic_search_data, id: false do |t| + t.integer :topic_id, null: false, primary_key: true + t.text :raw_data + t.string :locale, null: false + t.tsvector :search_data + end + + execute "CREATE INDEX idx_search_topic ON topic_search_data USING gin (search_data)" + end + + def down + drop_table :topic_search_data + end +end diff --git a/lib/search.rb b/lib/search.rb index 7a1423d73c7..19251398138 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -55,6 +55,19 @@ class Search SearchObserver.index(post) end + posts = Post.joins(:topic) + .where('posts.id IN ( + SELECT p2.id FROM posts p2 + LEFT JOIN topic_search_data pd ON locale = ? AND p2.topic_id = pd.topic_id + WHERE pd.topic_id IS NULL AND p2.post_number = 1 + )', SiteSetting.default_locale).limit(10000) + + posts.each do |post| + # force indexing + post.cooked += " " + SearchObserver.index(post) + end + nil end diff --git a/lib/tasks/search.rake b/lib/tasks/search.rake index db4835aae57..3d6483cad75 100644 --- a/lib/tasks/search.rake +++ b/lib/tasks/search.rake @@ -3,7 +3,7 @@ task "search:reindex" => :environment do puts "Reindexing #{db}" puts "" puts "Posts:" - Post.exec_sql("select p.id, p.cooked, c.name category, t.title from + Post.exec_sql("select p.id, p.cooked, c.name category, t.title, p.post_number, t.id topic_id from posts p join topics t on t.id = p.topic_id left join categories c on c.id = t.category_id @@ -12,7 +12,11 @@ task "search:reindex" => :environment do cooked = p["cooked"] title = p["title"] category = p["cat"] + post_number = p["post_number"].to_i + topic_id = p["topic_id"].to_i + SearchObserver.update_posts_index(post_id, cooked, title, category) + SearchObserver.update_topics_index(topic_id, title , cooked) if post_number == 1 putc "." end