correct regression searching with diacritics
This commit is contained in:
parent
29315b73c2
commit
ac11f8df52
|
@ -166,6 +166,14 @@ class SearchIndexer
|
|||
end
|
||||
|
||||
class HtmlScrubber < Nokogiri::XML::SAX::Document
|
||||
|
||||
def self.strip_diacritics(str)
|
||||
s = str.unicode_normalize(:nfkd)
|
||||
s.gsub!(DIACRITICS, "")
|
||||
s.strip!
|
||||
s
|
||||
end
|
||||
|
||||
attr_reader :scrubbed
|
||||
|
||||
def initialize
|
||||
|
@ -192,8 +200,8 @@ class SearchIndexer
|
|||
|
||||
DIACRITICS ||= /([\u0300-\u036f]|[\u1AB0-\u1AFF]|[\u1DC0-\u1DFF]|[\u20D0-\u20FF])/
|
||||
|
||||
def characters(string)
|
||||
scrubbed << " #{string.unicode_normalize(:nfkd).gsub(DIACRITICS, "").strip} "
|
||||
def characters(str)
|
||||
scrubbed << " #{HtmlScrubber.strip_diacritics(str)} "
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -132,11 +132,14 @@ class Search
|
|||
@valid = true
|
||||
@page = @opts[:page]
|
||||
|
||||
term = term.to_s.dup
|
||||
|
||||
# Removes any zero-width characters from search terms
|
||||
term.to_s.gsub!(/[\u200B-\u200D\uFEFF]/, '')
|
||||
term.gsub!(/[\u200B-\u200D\uFEFF]/, '')
|
||||
# Replace curly quotes to regular quotes
|
||||
term.to_s.gsub!(/[\u201c\u201d]/, '"')
|
||||
@clean_term = term.to_s.dup
|
||||
term.gsub!(/[\u201c\u201d]/, '"')
|
||||
|
||||
@clean_term = term
|
||||
|
||||
term = process_advanced_search!(term)
|
||||
|
||||
|
@ -825,9 +828,10 @@ class Search
|
|||
end
|
||||
|
||||
def ts_query(ts_config = nil, weight_filter: nil)
|
||||
# we must strip diacritics otherwise we will get no matches
|
||||
@ts_query_cache ||= {}
|
||||
@ts_query_cache["#{ts_config || default_ts_config} #{@term} #{weight_filter}"] ||=
|
||||
Search.ts_query(term: @term, ts_config: ts_config, weight_filter: weight_filter)
|
||||
Search.ts_query(term: SearchIndexer::HtmlScrubber.strip_diacritics(@term), ts_config: ts_config, weight_filter: weight_filter)
|
||||
end
|
||||
|
||||
def wrap_rows(query)
|
||||
|
|
|
@ -990,6 +990,24 @@ describe Search do
|
|||
end
|
||||
end
|
||||
|
||||
context 'diacritics' do
|
||||
let!(:post1) { Fabricate(:post, raw: 'สวัสดี Régis hello') }
|
||||
|
||||
it ('allows strips correctly') do
|
||||
results = Search.execute('hello', type_filter: 'topic')
|
||||
expect(results.posts.length).to eq(1)
|
||||
|
||||
results = Search.execute('regis', type_filter: 'topic')
|
||||
expect(results.posts.length).to eq(1)
|
||||
|
||||
results = Search.execute('Régis', type_filter: 'topic')
|
||||
expect(results.posts.length).to eq(1)
|
||||
|
||||
results = Search.execute('สวัสดี', type_filter: 'topic')
|
||||
expect(results.posts.length).to eq(1)
|
||||
end
|
||||
end
|
||||
|
||||
context 'pagination' do
|
||||
let(:number_of_results) { 2 }
|
||||
let!(:post1) { Fabricate(:post, raw: 'hello hello hello hello hello') }
|
||||
|
|
Loading…
Reference in New Issue