diff --git a/app/models/topic.rb b/app/models/topic.rb index de3034e3249..883a35c4651 100644 --- a/app/models/topic.rb +++ b/app/models/topic.rb @@ -581,9 +581,17 @@ class Topic < ActiveRecord::Base return [] if title.blank? raw = raw.presence || "" - search_data = "#{title} #{raw[0...MAX_SIMILAR_BODY_LENGTH]}".strip - filter_words = Search.prepare_data(search_data) - ts_query = Search.ts_query(term: filter_words, joiner: "|") + title_tsquery = Search.set_tsquery_weight_filter( + Search.prepare_data(title.strip), + 'A' + ) + + raw_tsquery = Search.set_tsquery_weight_filter( + Search.prepare_data(raw[0...MAX_SIMILAR_BODY_LENGTH].strip), + 'B' + ) + + tsquery = Search.to_tsquery(term: "#{title_tsquery} & #{raw_tsquery}", joiner: "|") candidates = Topic .visible @@ -591,9 +599,9 @@ class Topic < ActiveRecord::Base .secured(Guardian.new(user)) .joins("JOIN topic_search_data s ON topics.id = s.topic_id") .joins("LEFT JOIN categories c ON topics.id = c.topic_id") - .where("search_data @@ #{ts_query}") + .where("search_data @@ #{tsquery}") .where("c.topic_id IS NULL") - .order("ts_rank(search_data, #{ts_query}) DESC") + .order("ts_rank(search_data, #{tsquery}) DESC") .limit(SiteSetting.max_similar_results * 3) candidate_ids = candidates.pluck(:id) diff --git a/lib/search.rb b/lib/search.rb index 02462cded77..ade1ee3dae0 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -1025,13 +1025,25 @@ class Search end def self.ts_query(term: , ts_config: nil, joiner: nil, weight_filter: nil) + to_tsquery( + ts_config: ts_config, + term: set_tsquery_weight_filter(term, weight_filter), + joiner: joiner + ) + end + + def self.to_tsquery(ts_config: nil, term:, joiner: nil) ts_config = ActiveRecord::Base.connection.quote(ts_config) if ts_config - term = term.gsub("'", "''") - tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, '''#{PG::Connection.escape_string(term)}'':*#{weight_filter}')" + tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, '#{term}')" tsquery = "REPLACE(#{tsquery}::text, '&', '#{PG::Connection.escape_string(joiner)}')::tsquery" if joiner tsquery end + def self.set_tsquery_weight_filter(term, weight_filter) + term = term.gsub("'", "''") + "''#{PG::Connection.escape_string(term)}'':*#{weight_filter}" + end + def ts_query(ts_config = nil, weight_filter: nil) @ts_query_cache ||= {} @ts_query_cache["#{ts_config || default_ts_config} #{@term} #{weight_filter}"] ||= diff --git a/spec/models/topic_spec.rb b/spec/models/topic_spec.rb index b8be9f34684..29af224906e 100644 --- a/spec/models/topic_spec.rb +++ b/spec/models/topic_spec.rb @@ -502,37 +502,46 @@ describe Topic do end end - context 'similar_to' do + context '.similar_to' do + fab!(:category) { Fabricate(:category_with_definition) } - it 'returns blank with nil params' do - expect(Topic.similar_to(nil, nil)).to be_blank + it 'returns an empty array with nil params' do + expect(Topic.similar_to(nil, nil)).to eq([]) end context "with a category definition" do - let!(:category) { Fabricate(:category_with_definition) } - it "excludes the category definition topic from similar_to" do - expect(Topic.similar_to('category definition for', "no body")).to be_blank + expect(Topic.similar_to('category definition for', "no body")).to eq([]) end end context 'with a similar topic' do - let!(:topic) { + fab!(:post) { SearchIndexer.enable - post = create_post(title: "Evil trout is the dude who posted this topic") - post.topic + create_post(title: "Evil trout is the dude who posted this topic") } + let(:topic) { post.topic } + + before do + SearchIndexer.enable + end + it 'returns the similar topic if the title is similar' do expect(Topic.similar_to("has evil trout made any topics?", "i am wondering has evil trout made any topics?")).to eq([topic]) end - context "secure categories" do - fab!(:category) { Fabricate(:category_with_definition, read_restricted: true) } + it 'matches title against title and raw against raw when searching for topics' do + topic.update!(title: '1 2 3 numbered titles') + post.update!(raw: 'random toy poodle') + expect(Topic.similar_to("unrelated term", "1 2 3 poddle")).to eq([]) + end + + context "secure categories" do before do - topic.category = category - topic.save + category.update!(read_restricted: true) + topic.update!(category: category) end it "doesn't return topics from private categories" do