FIX: remove diacritics instead of transliterating
This commit is contained in:
parent
f4ae53d52b
commit
bc7b530b0a
|
@ -190,8 +190,10 @@ class SearchIndexer
|
|||
end
|
||||
end
|
||||
|
||||
DIACRITICS ||= /([\u0300-\u036f]|[\u1AB0-\u1AFF]|[\u1DC0-\u1DFF]|[\u20D0-\u20FF])/
|
||||
|
||||
def characters(string)
|
||||
scrubbed << " #{ActiveSupport::Inflector.transliterate(string).strip} "
|
||||
scrubbed << " #{string.unicode_normalize(:nfd).gsub(DIACRITICS, "").strip} "
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -30,11 +30,11 @@ describe SearchIndexer do
|
|||
end
|
||||
|
||||
it 'removes diacritics' do
|
||||
html = "<p>Hétérogénéité</p>"
|
||||
html = "<p>HELLO Hétérogénéité Здравствуйте هتاف للترحيب 你好</p>"
|
||||
|
||||
scrubbed = SearchIndexer::HtmlScrubber.scrub(html)
|
||||
|
||||
expect(scrubbed).to eq(" Heterogeneite ")
|
||||
expect(scrubbed).to eq(" HELLO Heterogeneite Здравствуите هتاف للترحيب 你好 ")
|
||||
end
|
||||
|
||||
it 'correctly indexes a post according to version' do
|
||||
|
|
Loading…
Reference in New Issue