FIX: Improve `Topic.similar_to` with better `Topic#title` matches.
This changes PG text search to only match the given title against lexemes that are formed from the title. Likewise, the given raw will only be matched against lexemes that are formed from the post's raw.
This commit is contained in:
parent
14003abc37
commit
597d542c33
|
@ -581,9 +581,17 @@ class Topic < ActiveRecord::Base
|
|||
return [] if title.blank?
|
||||
raw = raw.presence || ""
|
||||
|
||||
search_data = "#{title} #{raw[0...MAX_SIMILAR_BODY_LENGTH]}".strip
|
||||
filter_words = Search.prepare_data(search_data)
|
||||
ts_query = Search.ts_query(term: filter_words, joiner: "|")
|
||||
title_tsquery = Search.set_tsquery_weight_filter(
|
||||
Search.prepare_data(title.strip),
|
||||
'A'
|
||||
)
|
||||
|
||||
raw_tsquery = Search.set_tsquery_weight_filter(
|
||||
Search.prepare_data(raw[0...MAX_SIMILAR_BODY_LENGTH].strip),
|
||||
'B'
|
||||
)
|
||||
|
||||
tsquery = Search.to_tsquery(term: "#{title_tsquery} & #{raw_tsquery}", joiner: "|")
|
||||
|
||||
candidates = Topic
|
||||
.visible
|
||||
|
@ -591,9 +599,9 @@ class Topic < ActiveRecord::Base
|
|||
.secured(Guardian.new(user))
|
||||
.joins("JOIN topic_search_data s ON topics.id = s.topic_id")
|
||||
.joins("LEFT JOIN categories c ON topics.id = c.topic_id")
|
||||
.where("search_data @@ #{ts_query}")
|
||||
.where("search_data @@ #{tsquery}")
|
||||
.where("c.topic_id IS NULL")
|
||||
.order("ts_rank(search_data, #{ts_query}) DESC")
|
||||
.order("ts_rank(search_data, #{tsquery}) DESC")
|
||||
.limit(SiteSetting.max_similar_results * 3)
|
||||
|
||||
candidate_ids = candidates.pluck(:id)
|
||||
|
|
|
@ -1025,13 +1025,25 @@ class Search
|
|||
end
|
||||
|
||||
def self.ts_query(term: , ts_config: nil, joiner: nil, weight_filter: nil)
|
||||
to_tsquery(
|
||||
ts_config: ts_config,
|
||||
term: set_tsquery_weight_filter(term, weight_filter),
|
||||
joiner: joiner
|
||||
)
|
||||
end
|
||||
|
||||
def self.to_tsquery(ts_config: nil, term:, joiner: nil)
|
||||
ts_config = ActiveRecord::Base.connection.quote(ts_config) if ts_config
|
||||
term = term.gsub("'", "''")
|
||||
tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, '''#{PG::Connection.escape_string(term)}'':*#{weight_filter}')"
|
||||
tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, '#{term}')"
|
||||
tsquery = "REPLACE(#{tsquery}::text, '&', '#{PG::Connection.escape_string(joiner)}')::tsquery" if joiner
|
||||
tsquery
|
||||
end
|
||||
|
||||
def self.set_tsquery_weight_filter(term, weight_filter)
|
||||
term = term.gsub("'", "''")
|
||||
"''#{PG::Connection.escape_string(term)}'':*#{weight_filter}"
|
||||
end
|
||||
|
||||
def ts_query(ts_config = nil, weight_filter: nil)
|
||||
@ts_query_cache ||= {}
|
||||
@ts_query_cache["#{ts_config || default_ts_config} #{@term} #{weight_filter}"] ||=
|
||||
|
|
|
@ -502,37 +502,46 @@ describe Topic do
|
|||
end
|
||||
end
|
||||
|
||||
context 'similar_to' do
|
||||
context '.similar_to' do
|
||||
fab!(:category) { Fabricate(:category_with_definition) }
|
||||
|
||||
it 'returns blank with nil params' do
|
||||
expect(Topic.similar_to(nil, nil)).to be_blank
|
||||
it 'returns an empty array with nil params' do
|
||||
expect(Topic.similar_to(nil, nil)).to eq([])
|
||||
end
|
||||
|
||||
context "with a category definition" do
|
||||
let!(:category) { Fabricate(:category_with_definition) }
|
||||
|
||||
it "excludes the category definition topic from similar_to" do
|
||||
expect(Topic.similar_to('category definition for', "no body")).to be_blank
|
||||
expect(Topic.similar_to('category definition for', "no body")).to eq([])
|
||||
end
|
||||
end
|
||||
|
||||
context 'with a similar topic' do
|
||||
let!(:topic) {
|
||||
fab!(:post) {
|
||||
SearchIndexer.enable
|
||||
post = create_post(title: "Evil trout is the dude who posted this topic")
|
||||
post.topic
|
||||
create_post(title: "Evil trout is the dude who posted this topic")
|
||||
}
|
||||
|
||||
let(:topic) { post.topic }
|
||||
|
||||
before do
|
||||
SearchIndexer.enable
|
||||
end
|
||||
|
||||
it 'returns the similar topic if the title is similar' do
|
||||
expect(Topic.similar_to("has evil trout made any topics?", "i am wondering has evil trout made any topics?")).to eq([topic])
|
||||
end
|
||||
|
||||
context "secure categories" do
|
||||
fab!(:category) { Fabricate(:category_with_definition, read_restricted: true) }
|
||||
it 'matches title against title and raw against raw when searching for topics' do
|
||||
topic.update!(title: '1 2 3 numbered titles')
|
||||
post.update!(raw: 'random toy poodle')
|
||||
|
||||
expect(Topic.similar_to("unrelated term", "1 2 3 poddle")).to eq([])
|
||||
end
|
||||
|
||||
context "secure categories" do
|
||||
before do
|
||||
topic.category = category
|
||||
topic.save
|
||||
category.update!(read_restricted: true)
|
||||
topic.update!(category: category)
|
||||
end
|
||||
|
||||
it "doesn't return topics from private categories" do
|
||||
|
|
Loading…
Reference in New Issue