FIX: Remove duplicate Emoji names from blurb

The blurb contained the value of the alt and title attribute of Emojis. Both values are always the same.
This commit is contained in:
Gerhard Schlager 2019-04-29 17:26:29 +02:00
parent 71d19f6e1f
commit 876c4f20b3
2 changed files with 10 additions and 0 deletions

View File

@ -212,6 +212,10 @@ class SearchIndexer
end
end
document.css("img[class='emoji']").each do |node|
node.remove_attribute("alt")
end
document.css("a[href]").each do |node|
if node["href"] == node.text || MENTION_CLASSES.include?(node["class"])
node.remove_attribute("href")

View File

@ -50,6 +50,12 @@ describe SearchIndexer do
expect(scrubbed).to eq('@автомобилист')
end
it 'extracts emoji name from emoji image' do
html = %Q|<img src="#{Discourse.base_url_no_prefix}/images/emoji/twitter/wink.png?v=9" title=":wink:" class="emoji" alt=":wink:">|
scrubbed = scrub(html)
expect(scrubbed).to eq(':wink:')
end
it 'uses ignore_accent setting to strip diacritics' do
html = "<p>HELLO Hétérogénéité Здравствуйте هتاف للترحيب 你好</p>"