FIX: remove diacritics instead of transliterating

This commit is contained in:
Régis Hanol 2018-08-24 00:38:44 +02:00
parent f4ae53d52b
commit bc7b530b0a
2 changed files with 5 additions and 3 deletions

View File

@ -190,8 +190,10 @@ class SearchIndexer
end
end
DIACRITICS ||= /([\u0300-\u036f]|[\u1AB0-\u1AFF]|[\u1DC0-\u1DFF]|[\u20D0-\u20FF])/
def characters(string)
scrubbed << " #{ActiveSupport::Inflector.transliterate(string).strip} "
scrubbed << " #{string.unicode_normalize(:nfd).gsub(DIACRITICS, "").strip} "
end
end
end

View File

@ -30,11 +30,11 @@ describe SearchIndexer do
end
it 'removes diacritics' do
html = "<p>Hétérogénéité</p>"
html = "<p>HELLO Hétérogénéité Здравствуйте هتاف للترحيب 你好</p>"
scrubbed = SearchIndexer::HtmlScrubber.scrub(html)
expect(scrubbed).to eq(" Heterogeneite ")
expect(scrubbed).to eq(" HELLO Heterogeneite Здравствуите هتاف للترحيب 你好 ")
end
it 'correctly indexes a post according to version' do