FIX: Remap postgres text search proximity operator (#25497)

Why this change?

Since 1dba1aca27, we have been remapping
the `<->` proximity operator in a tsquery to `&`. However, there is
another variant of it which follows the `<N>` pattern. For example, the
following text "end-to-end" will eventually result in the following
tsquery `end-to-end:* <-> end:* <2> end:*` being generated by Postgres.
Before this fix, the tsquery is remapped to `end-to-end:* & end:* <2>
end:*` by us. This is requires the search data which we store to contain
`end` at exactly 2 position apart. Due to the way we limit the
number of duplicates in our search data, the search term may end up not
matching anything. In bd32912c5e, we made
it such that we do not allow any duplicates when indexing a topic's
title. Therefore, search for `end-to-end` against a topic title with
`end-to-end` will never match because our index will only contain one
`end` term.

What does this change do?

We will remap the `<N>` variant of the proximity operator.
This commit is contained in:
Alan Guo Xiang Tan 2024-02-01 07:20:46 +08:00 committed by GitHub
parent f2ac9e4c12
commit e61608d080
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 3 deletions

View File

@ -1333,7 +1333,7 @@ class Search
tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, #{escaped_term})"
# PG 14 and up default to using the followed by operator
# this restores the old behavior
tsquery = "REPLACE(#{tsquery}::text, '<->', '&')::tsquery"
tsquery = "REGEXP_REPLACE(#{tsquery}::text, '<->|<\\d+>', '&', 'g')::tsquery"
tsquery = "REPLACE(#{tsquery}::text, '&', '#{escape_string(joiner)}')::tsquery" if joiner
tsquery
end

View File

@ -1025,6 +1025,14 @@ RSpec.describe Search do
results = Search.execute("tiger", guardian: Guardian.new(user))
expect(results.posts).to eq([post])
end
it "does not rely on postgres's proximity opreators" do
topic.update!(title: "End-to-end something something testing")
results = Search.execute("end-to-end test")
expect(results.posts).to eq([post])
end
end
describe "topics" do
@ -2375,13 +2383,19 @@ RSpec.describe Search do
it "escapes the term correctly" do
expect(Search.ts_query(term: 'Title with trailing backslash\\')).to eq(
"REPLACE(TO_TSQUERY('english', '''Title with trailing backslash\\\\\\\\'':*')::text, '<->', '&')::tsquery",
"REGEXP_REPLACE(TO_TSQUERY('english', '''Title with trailing backslash\\\\\\\\'':*')::text, '<->|<\\d+>', '&', 'g')::tsquery",
)
expect(Search.ts_query(term: "Title with trailing quote'")).to eq(
"REPLACE(TO_TSQUERY('english', '''Title with trailing quote'''''':*')::text, '<->', '&')::tsquery",
"REGEXP_REPLACE(TO_TSQUERY('english', '''Title with trailing quote'''''':*')::text, '<->|<\\d+>', '&', 'g')::tsquery",
)
end
it "remaps postgres's proximity operators '<->' and its `<N>` variant" do
expect(
DB.query_single("SELECT #{Search.ts_query(term: "end-to-end")}::text"),
).to contain_exactly("'end-to-end':* & 'end':* & 'end':*")
end
end
describe "#word_to_date" do