FIX: Make sure generated tsqueries are valid (#19368)

The tsquery used for searching is generated using both functions from
Ruby and Postgresql (for example, unaccent function). Depending on the
term used, it generated an invalid tsquery. For example "can’t"
generated "''can''t''" instead of "''can''''t''".
This commit is contained in:
Bianca Nenciu 2022-12-12 17:57:20 +02:00 committed by GitHub
parent 19214aff18
commit 17b7ab0d7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 39 additions and 14 deletions

View File

@ -1168,28 +1168,18 @@ class Search
self.class.default_ts_config
end
def self.ts_query(term: , ts_config: nil, joiner: nil, weight_filter: nil)
def self.ts_query(term:, ts_config: nil, joiner: nil, weight_filter: nil)
to_tsquery(
ts_config: ts_config,
term: set_tsquery_weight_filter(term, weight_filter),
joiner: joiner
)
end
def self.to_tsquery(ts_config: nil, term:, joiner: nil)
ts_config = ActiveRecord::Base.connection.quote(ts_config) if ts_config
# unaccent can be used only when a joiner is present because the
# additional processing and the final conversion to tsquery does not
# work well with characters that are converted to quotes by unaccent.
if joiner
tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, '#{self.escape_string(term)}')"
tsquery = "REPLACE(#{tsquery}::text, '&', '#{self.escape_string(joiner)}')::tsquery"
else
escaped_term = Search.wrap_unaccent("'#{self.escape_string(term)}'")
tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, #{escaped_term})"
end
escaped_term = wrap_unaccent("'#{escape_string(term)}'")
tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, #{escaped_term})"
tsquery = "REPLACE(#{tsquery}::text, '&', '#{escape_string(joiner)}')::tsquery" if joiner
tsquery
end
@ -1198,6 +1188,10 @@ class Search
end
def self.escape_string(term)
# HACK: The has to be "unaccented" before it is escaped or the resulting
# tsqueries will be invalid
term = term.gsub("\u{2019}", "'") if SiteSetting.search_ignore_accents
PG::Connection.escape_string(term).gsub('\\', '\\\\\\')
end

View File

@ -115,6 +115,37 @@ RSpec.describe Search do
end
end
context "with apostrophes" do
fab!(:post_1) { Fabricate(:post, raw: "searching for: John's") }
fab!(:post_2) { Fabricate(:post, raw: "searching for: Johns") }
before do
SearchIndexer.enable
end
after do
SearchIndexer.disable
end
it "returns correct results" do
SiteSetting.search_ignore_accents = false
[post_1, post_2].each { |post| SearchIndexer.index(post.topic, force: true) }
expect(Search.execute("John's").posts).to contain_exactly(post_1, post_2)
expect(Search.execute("Johns").posts).to contain_exactly(post_1, post_2)
expect(Search.execute("Johns").posts).to contain_exactly(post_1, post_2)
end
it "returns correct results with accents" do
SiteSetting.search_ignore_accents = true
[post_1, post_2].each { |post| SearchIndexer.index(post.topic, force: true) }
expect(Search.execute("John's").posts).to contain_exactly(post_1, post_2)
expect(Search.execute("Johns").posts).to contain_exactly(post_1, post_2)
expect(Search.execute("Johns").posts).to contain_exactly(post_1, post_2)
end
end
describe "custom_eager_load" do
fab!(:topic) { Fabricate(:topic) }
fab!(:post) { Fabricate(:post, topic: topic) }